File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Scalar/LoopInterchange.cpp |
Warning: | line 1576, column 7 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- LoopInterchange.cpp - Loop interchange pass-------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This Pass handles loop interchange transform. | |||
10 | // This pass interchanges loops to provide a more cache-friendly memory access | |||
11 | // patterns. | |||
12 | // | |||
13 | //===----------------------------------------------------------------------===// | |||
14 | ||||
15 | #include "llvm/Transforms/Scalar/LoopInterchange.h" | |||
16 | #include "llvm/ADT/STLExtras.h" | |||
17 | #include "llvm/ADT/SmallVector.h" | |||
18 | #include "llvm/ADT/Statistic.h" | |||
19 | #include "llvm/ADT/StringRef.h" | |||
20 | #include "llvm/Analysis/DependenceAnalysis.h" | |||
21 | #include "llvm/Analysis/LoopInfo.h" | |||
22 | #include "llvm/Analysis/LoopNestAnalysis.h" | |||
23 | #include "llvm/Analysis/LoopPass.h" | |||
24 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | |||
25 | #include "llvm/Analysis/ScalarEvolution.h" | |||
26 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | |||
27 | #include "llvm/IR/BasicBlock.h" | |||
28 | #include "llvm/IR/Constants.h" | |||
29 | #include "llvm/IR/DiagnosticInfo.h" | |||
30 | #include "llvm/IR/Dominators.h" | |||
31 | #include "llvm/IR/Function.h" | |||
32 | #include "llvm/IR/IRBuilder.h" | |||
33 | #include "llvm/IR/InstrTypes.h" | |||
34 | #include "llvm/IR/Instruction.h" | |||
35 | #include "llvm/IR/Instructions.h" | |||
36 | #include "llvm/IR/Type.h" | |||
37 | #include "llvm/IR/User.h" | |||
38 | #include "llvm/IR/Value.h" | |||
39 | #include "llvm/InitializePasses.h" | |||
40 | #include "llvm/Pass.h" | |||
41 | #include "llvm/Support/Casting.h" | |||
42 | #include "llvm/Support/CommandLine.h" | |||
43 | #include "llvm/Support/Debug.h" | |||
44 | #include "llvm/Support/ErrorHandling.h" | |||
45 | #include "llvm/Support/raw_ostream.h" | |||
46 | #include "llvm/Transforms/Scalar.h" | |||
47 | #include "llvm/Transforms/Utils.h" | |||
48 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | |||
49 | #include "llvm/Transforms/Utils/LoopUtils.h" | |||
50 | #include <cassert> | |||
51 | #include <utility> | |||
52 | #include <vector> | |||
53 | ||||
54 | using namespace llvm; | |||
55 | ||||
56 | #define DEBUG_TYPE"loop-interchange" "loop-interchange" | |||
57 | ||||
58 | STATISTIC(LoopsInterchanged, "Number of loops interchanged")static llvm::Statistic LoopsInterchanged = {"loop-interchange" , "LoopsInterchanged", "Number of loops interchanged"}; | |||
59 | ||||
60 | static cl::opt<int> LoopInterchangeCostThreshold( | |||
61 | "loop-interchange-threshold", cl::init(0), cl::Hidden, | |||
62 | cl::desc("Interchange if you gain more than this number")); | |||
63 | ||||
64 | namespace { | |||
65 | ||||
66 | using LoopVector = SmallVector<Loop *, 8>; | |||
67 | ||||
68 | // TODO: Check if we can use a sparse matrix here. | |||
69 | using CharMatrix = std::vector<std::vector<char>>; | |||
70 | ||||
71 | } // end anonymous namespace | |||
72 | ||||
73 | // Maximum number of dependencies that can be handled in the dependency matrix. | |||
74 | static const unsigned MaxMemInstrCount = 100; | |||
75 | ||||
76 | // Maximum loop depth supported. | |||
77 | static const unsigned MaxLoopNestDepth = 10; | |||
78 | ||||
79 | #ifdef DUMP_DEP_MATRICIES | |||
80 | static void printDepMatrix(CharMatrix &DepMatrix) { | |||
81 | for (auto &Row : DepMatrix) { | |||
82 | for (auto D : Row) | |||
83 | LLVM_DEBUG(dbgs() << D << " ")do { } while (false); | |||
84 | LLVM_DEBUG(dbgs() << "\n")do { } while (false); | |||
85 | } | |||
86 | } | |||
87 | #endif | |||
88 | ||||
89 | static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, | |||
90 | Loop *L, DependenceInfo *DI) { | |||
91 | using ValueVector = SmallVector<Value *, 16>; | |||
92 | ||||
93 | ValueVector MemInstr; | |||
94 | ||||
95 | // For each block. | |||
96 | for (BasicBlock *BB : L->blocks()) { | |||
97 | // Scan the BB and collect legal loads and stores. | |||
98 | for (Instruction &I : *BB) { | |||
99 | if (!isa<Instruction>(I)) | |||
100 | return false; | |||
101 | if (auto *Ld = dyn_cast<LoadInst>(&I)) { | |||
102 | if (!Ld->isSimple()) | |||
103 | return false; | |||
104 | MemInstr.push_back(&I); | |||
105 | } else if (auto *St = dyn_cast<StoreInst>(&I)) { | |||
106 | if (!St->isSimple()) | |||
107 | return false; | |||
108 | MemInstr.push_back(&I); | |||
109 | } | |||
110 | } | |||
111 | } | |||
112 | ||||
113 | LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()do { } while (false) | |||
114 | << " Loads and Stores to analyze\n")do { } while (false); | |||
115 | ||||
116 | ValueVector::iterator I, IE, J, JE; | |||
117 | ||||
118 | for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { | |||
119 | for (J = I, JE = MemInstr.end(); J != JE; ++J) { | |||
120 | std::vector<char> Dep; | |||
121 | Instruction *Src = cast<Instruction>(*I); | |||
122 | Instruction *Dst = cast<Instruction>(*J); | |||
123 | if (Src == Dst) | |||
124 | continue; | |||
125 | // Ignore Input dependencies. | |||
126 | if (isa<LoadInst>(Src) && isa<LoadInst>(Dst)) | |||
127 | continue; | |||
128 | // Track Output, Flow, and Anti dependencies. | |||
129 | if (auto D = DI->depends(Src, Dst, true)) { | |||
130 | assert(D->isOrdered() && "Expected an output, flow or anti dep.")((void)0); | |||
131 | LLVM_DEBUG(StringRef DepType =do { } while (false) | |||
132 | D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";do { } while (false) | |||
133 | dbgs() << "Found " << DepTypedo { } while (false) | |||
134 | << " dependency between Src and Dst\n"do { } while (false) | |||
135 | << " Src:" << *Src << "\n Dst:" << *Dst << '\n')do { } while (false); | |||
136 | unsigned Levels = D->getLevels(); | |||
137 | char Direction; | |||
138 | for (unsigned II = 1; II <= Levels; ++II) { | |||
139 | const SCEV *Distance = D->getDistance(II); | |||
140 | const SCEVConstant *SCEVConst = | |||
141 | dyn_cast_or_null<SCEVConstant>(Distance); | |||
142 | if (SCEVConst) { | |||
143 | const ConstantInt *CI = SCEVConst->getValue(); | |||
144 | if (CI->isNegative()) | |||
145 | Direction = '<'; | |||
146 | else if (CI->isZero()) | |||
147 | Direction = '='; | |||
148 | else | |||
149 | Direction = '>'; | |||
150 | Dep.push_back(Direction); | |||
151 | } else if (D->isScalar(II)) { | |||
152 | Direction = 'S'; | |||
153 | Dep.push_back(Direction); | |||
154 | } else { | |||
155 | unsigned Dir = D->getDirection(II); | |||
156 | if (Dir == Dependence::DVEntry::LT || | |||
157 | Dir == Dependence::DVEntry::LE) | |||
158 | Direction = '<'; | |||
159 | else if (Dir == Dependence::DVEntry::GT || | |||
160 | Dir == Dependence::DVEntry::GE) | |||
161 | Direction = '>'; | |||
162 | else if (Dir == Dependence::DVEntry::EQ) | |||
163 | Direction = '='; | |||
164 | else | |||
165 | Direction = '*'; | |||
166 | Dep.push_back(Direction); | |||
167 | } | |||
168 | } | |||
169 | while (Dep.size() != Level) { | |||
170 | Dep.push_back('I'); | |||
171 | } | |||
172 | ||||
173 | DepMatrix.push_back(Dep); | |||
174 | if (DepMatrix.size() > MaxMemInstrCount) { | |||
175 | LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCountdo { } while (false) | |||
176 | << " dependencies inside loop\n")do { } while (false); | |||
177 | return false; | |||
178 | } | |||
179 | } | |||
180 | } | |||
181 | } | |||
182 | ||||
183 | return true; | |||
184 | } | |||
185 | ||||
186 | // A loop is moved from index 'from' to an index 'to'. Update the Dependence | |||
187 | // matrix by exchanging the two columns. | |||
188 | static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, | |||
189 | unsigned ToIndx) { | |||
190 | for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I) | |||
191 | std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]); | |||
192 | } | |||
193 | ||||
194 | // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is | |||
195 | // '>' | |||
196 | static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, | |||
197 | unsigned Column) { | |||
198 | for (unsigned i = 0; i <= Column; ++i) { | |||
199 | if (DepMatrix[Row][i] == '<') | |||
200 | return false; | |||
201 | if (DepMatrix[Row][i] == '>') | |||
202 | return true; | |||
203 | } | |||
204 | // All dependencies were '=','S' or 'I' | |||
205 | return false; | |||
206 | } | |||
207 | ||||
208 | // Checks if no dependence exist in the dependency matrix in Row before Column. | |||
209 | static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, | |||
210 | unsigned Column) { | |||
211 | for (unsigned i = 0; i < Column; ++i) { | |||
212 | if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' && | |||
213 | DepMatrix[Row][i] != 'I') | |||
214 | return false; | |||
215 | } | |||
216 | return true; | |||
217 | } | |||
218 | ||||
219 | static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, | |||
220 | unsigned OuterLoopId, char InnerDep, | |||
221 | char OuterDep) { | |||
222 | if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId)) | |||
223 | return false; | |||
224 | ||||
225 | if (InnerDep == OuterDep) | |||
226 | return true; | |||
227 | ||||
228 | // It is legal to interchange if and only if after interchange no row has a | |||
229 | // '>' direction as the leftmost non-'='. | |||
230 | ||||
231 | if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I') | |||
232 | return true; | |||
233 | ||||
234 | if (InnerDep == '<') | |||
235 | return true; | |||
236 | ||||
237 | if (InnerDep == '>') { | |||
238 | // If OuterLoopId represents outermost loop then interchanging will make the | |||
239 | // 1st dependency as '>' | |||
240 | if (OuterLoopId == 0) | |||
241 | return false; | |||
242 | ||||
243 | // If all dependencies before OuterloopId are '=','S'or 'I'. Then | |||
244 | // interchanging will result in this row having an outermost non '=' | |||
245 | // dependency of '>' | |||
246 | if (!containsNoDependence(DepMatrix, Row, OuterLoopId)) | |||
247 | return true; | |||
248 | } | |||
249 | ||||
250 | return false; | |||
251 | } | |||
252 | ||||
253 | // Checks if it is legal to interchange 2 loops. | |||
254 | // [Theorem] A permutation of the loops in a perfect nest is legal if and only | |||
255 | // if the direction matrix, after the same permutation is applied to its | |||
256 | // columns, has no ">" direction as the leftmost non-"=" direction in any row. | |||
257 | static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, | |||
258 | unsigned InnerLoopId, | |||
259 | unsigned OuterLoopId) { | |||
260 | unsigned NumRows = DepMatrix.size(); | |||
261 | // For each row check if it is valid to interchange. | |||
262 | for (unsigned Row = 0; Row < NumRows; ++Row) { | |||
263 | char InnerDep = DepMatrix[Row][InnerLoopId]; | |||
264 | char OuterDep = DepMatrix[Row][OuterLoopId]; | |||
265 | if (InnerDep == '*' || OuterDep == '*') | |||
266 | return false; | |||
267 | if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep)) | |||
268 | return false; | |||
269 | } | |||
270 | return true; | |||
271 | } | |||
272 | ||||
273 | static LoopVector populateWorklist(Loop &L) { | |||
274 | LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "do { } while (false) | |||
275 | << L.getHeader()->getParent()->getName() << " Loop: %"do { } while (false) | |||
276 | << L.getHeader()->getName() << '\n')do { } while (false); | |||
277 | LoopVector LoopList; | |||
278 | Loop *CurrentLoop = &L; | |||
279 | const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops(); | |||
280 | while (!Vec->empty()) { | |||
281 | // The current loop has multiple subloops in it hence it is not tightly | |||
282 | // nested. | |||
283 | // Discard all loops above it added into Worklist. | |||
284 | if (Vec->size() != 1) | |||
285 | return {}; | |||
286 | ||||
287 | LoopList.push_back(CurrentLoop); | |||
288 | CurrentLoop = Vec->front(); | |||
289 | Vec = &CurrentLoop->getSubLoops(); | |||
290 | } | |||
291 | LoopList.push_back(CurrentLoop); | |||
292 | return LoopList; | |||
293 | } | |||
294 | ||||
295 | static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { | |||
296 | PHINode *InnerIndexVar = L->getCanonicalInductionVariable(); | |||
297 | if (InnerIndexVar) | |||
298 | return InnerIndexVar; | |||
299 | if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr) | |||
300 | return nullptr; | |||
301 | for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { | |||
302 | PHINode *PhiVar = cast<PHINode>(I); | |||
303 | Type *PhiTy = PhiVar->getType(); | |||
304 | if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && | |||
305 | !PhiTy->isPointerTy()) | |||
306 | return nullptr; | |||
307 | const SCEVAddRecExpr *AddRec = | |||
308 | dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar)); | |||
309 | if (!AddRec || !AddRec->isAffine()) | |||
310 | continue; | |||
311 | const SCEV *Step = AddRec->getStepRecurrence(*SE); | |||
312 | if (!isa<SCEVConstant>(Step)) | |||
313 | continue; | |||
314 | // Found the induction variable. | |||
315 | // FIXME: Handle loops with more than one induction variable. Note that, | |||
316 | // currently, legality makes sure we have only one induction variable. | |||
317 | return PhiVar; | |||
318 | } | |||
319 | return nullptr; | |||
320 | } | |||
321 | ||||
322 | namespace { | |||
323 | ||||
324 | /// LoopInterchangeLegality checks if it is legal to interchange the loop. | |||
325 | class LoopInterchangeLegality { | |||
326 | public: | |||
327 | LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, | |||
328 | OptimizationRemarkEmitter *ORE) | |||
329 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} | |||
330 | ||||
331 | /// Check if the loops can be interchanged. | |||
332 | bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, | |||
333 | CharMatrix &DepMatrix); | |||
334 | ||||
335 | /// Check if the loop structure is understood. We do not handle triangular | |||
336 | /// loops for now. | |||
337 | bool isLoopStructureUnderstood(PHINode *InnerInductionVar); | |||
338 | ||||
339 | bool currentLimitations(); | |||
340 | ||||
341 | const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const { | |||
342 | return OuterInnerReductions; | |||
343 | } | |||
344 | ||||
345 | private: | |||
346 | bool tightlyNested(Loop *Outer, Loop *Inner); | |||
347 | bool containsUnsafeInstructions(BasicBlock *BB); | |||
348 | ||||
349 | /// Discover induction and reduction PHIs in the header of \p L. Induction | |||
350 | /// PHIs are added to \p Inductions, reductions are added to | |||
351 | /// OuterInnerReductions. When the outer loop is passed, the inner loop needs | |||
352 | /// to be passed as \p InnerLoop. | |||
353 | bool findInductionAndReductions(Loop *L, | |||
354 | SmallVector<PHINode *, 8> &Inductions, | |||
355 | Loop *InnerLoop); | |||
356 | ||||
357 | Loop *OuterLoop; | |||
358 | Loop *InnerLoop; | |||
359 | ||||
360 | ScalarEvolution *SE; | |||
361 | ||||
362 | /// Interface to emit optimization remarks. | |||
363 | OptimizationRemarkEmitter *ORE; | |||
364 | ||||
365 | /// Set of reduction PHIs taking part of a reduction across the inner and | |||
366 | /// outer loop. | |||
367 | SmallPtrSet<PHINode *, 4> OuterInnerReductions; | |||
368 | }; | |||
369 | ||||
370 | /// LoopInterchangeProfitability checks if it is profitable to interchange the | |||
371 | /// loop. | |||
372 | class LoopInterchangeProfitability { | |||
373 | public: | |||
374 | LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE, | |||
375 | OptimizationRemarkEmitter *ORE) | |||
376 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} | |||
377 | ||||
378 | /// Check if the loop interchange is profitable. | |||
379 | bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, | |||
380 | CharMatrix &DepMatrix); | |||
381 | ||||
382 | private: | |||
383 | int getInstrOrderCost(); | |||
384 | ||||
385 | Loop *OuterLoop; | |||
386 | Loop *InnerLoop; | |||
387 | ||||
388 | /// Scev analysis. | |||
389 | ScalarEvolution *SE; | |||
390 | ||||
391 | /// Interface to emit optimization remarks. | |||
392 | OptimizationRemarkEmitter *ORE; | |||
393 | }; | |||
394 | ||||
395 | /// LoopInterchangeTransform interchanges the loop. | |||
396 | class LoopInterchangeTransform { | |||
397 | public: | |||
398 | LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, | |||
399 | LoopInfo *LI, DominatorTree *DT, | |||
400 | const LoopInterchangeLegality &LIL) | |||
401 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {} | |||
402 | ||||
403 | /// Interchange OuterLoop and InnerLoop. | |||
404 | bool transform(); | |||
405 | void restructureLoops(Loop *NewInner, Loop *NewOuter, | |||
406 | BasicBlock *OrigInnerPreHeader, | |||
407 | BasicBlock *OrigOuterPreHeader); | |||
408 | void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); | |||
409 | ||||
410 | private: | |||
411 | bool adjustLoopLinks(); | |||
412 | bool adjustLoopBranches(); | |||
413 | ||||
414 | Loop *OuterLoop; | |||
415 | Loop *InnerLoop; | |||
416 | ||||
417 | /// Scev analysis. | |||
418 | ScalarEvolution *SE; | |||
419 | ||||
420 | LoopInfo *LI; | |||
421 | DominatorTree *DT; | |||
422 | ||||
423 | const LoopInterchangeLegality &LIL; | |||
424 | }; | |||
425 | ||||
426 | struct LoopInterchange { | |||
427 | ScalarEvolution *SE = nullptr; | |||
428 | LoopInfo *LI = nullptr; | |||
429 | DependenceInfo *DI = nullptr; | |||
430 | DominatorTree *DT = nullptr; | |||
431 | ||||
432 | /// Interface to emit optimization remarks. | |||
433 | OptimizationRemarkEmitter *ORE; | |||
434 | ||||
435 | LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI, | |||
436 | DominatorTree *DT, OptimizationRemarkEmitter *ORE) | |||
437 | : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {} | |||
438 | ||||
439 | bool run(Loop *L) { | |||
440 | if (L->getParentLoop()) | |||
441 | return false; | |||
442 | ||||
443 | return processLoopList(populateWorklist(*L)); | |||
444 | } | |||
445 | ||||
446 | bool run(LoopNest &LN) { | |||
447 | const auto &LoopList = LN.getLoops(); | |||
448 | for (unsigned I = 1; I < LoopList.size(); ++I) | |||
449 | if (LoopList[I]->getParentLoop() != LoopList[I - 1]) | |||
450 | return false; | |||
451 | return processLoopList(LoopList); | |||
452 | } | |||
453 | ||||
454 | bool isComputableLoopNest(ArrayRef<Loop *> LoopList) { | |||
455 | for (Loop *L : LoopList) { | |||
456 | const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); | |||
457 | if (isa<SCEVCouldNotCompute>(ExitCountOuter)) { | |||
458 | LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n")do { } while (false); | |||
459 | return false; | |||
460 | } | |||
461 | if (L->getNumBackEdges() != 1) { | |||
462 | LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n")do { } while (false); | |||
463 | return false; | |||
464 | } | |||
465 | if (!L->getExitingBlock()) { | |||
466 | LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n")do { } while (false); | |||
467 | return false; | |||
468 | } | |||
469 | } | |||
470 | return true; | |||
471 | } | |||
472 | ||||
473 | unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) { | |||
474 | // TODO: Add a better heuristic to select the loop to be interchanged based | |||
475 | // on the dependence matrix. Currently we select the innermost loop. | |||
476 | return LoopList.size() - 1; | |||
477 | } | |||
478 | ||||
479 | bool processLoopList(ArrayRef<Loop *> LoopList) { | |||
480 | bool Changed = false; | |||
481 | unsigned LoopNestDepth = LoopList.size(); | |||
482 | if (LoopNestDepth < 2) { | |||
483 | LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n")do { } while (false); | |||
484 | return false; | |||
485 | } | |||
486 | if (LoopNestDepth > MaxLoopNestDepth) { | |||
487 | LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "do { } while (false) | |||
488 | << MaxLoopNestDepth << "\n")do { } while (false); | |||
489 | return false; | |||
490 | } | |||
491 | if (!isComputableLoopNest(LoopList)) { | |||
492 | LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n")do { } while (false); | |||
493 | return false; | |||
494 | } | |||
495 | ||||
496 | LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepthdo { } while (false) | |||
497 | << "\n")do { } while (false); | |||
498 | ||||
499 | CharMatrix DependencyMatrix; | |||
500 | Loop *OuterMostLoop = *(LoopList.begin()); | |||
501 | if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth, | |||
502 | OuterMostLoop, DI)) { | |||
503 | LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n")do { } while (false); | |||
504 | return false; | |||
505 | } | |||
506 | #ifdef DUMP_DEP_MATRICIES | |||
507 | LLVM_DEBUG(dbgs() << "Dependence before interchange\n")do { } while (false); | |||
508 | printDepMatrix(DependencyMatrix); | |||
509 | #endif | |||
510 | ||||
511 | // Get the Outermost loop exit. | |||
512 | BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock(); | |||
513 | if (!LoopNestExit) { | |||
514 | LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block")do { } while (false); | |||
515 | return false; | |||
516 | } | |||
517 | ||||
518 | unsigned SelecLoopId = selectLoopForInterchange(LoopList); | |||
519 | // Move the selected loop outwards to the best possible position. | |||
520 | Loop *LoopToBeInterchanged = LoopList[SelecLoopId]; | |||
521 | for (unsigned i = SelecLoopId; i > 0; i--) { | |||
522 | bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i, | |||
523 | i - 1, DependencyMatrix); | |||
524 | if (!Interchanged) | |||
525 | return Changed; | |||
526 | // Update the DependencyMatrix | |||
527 | interChangeDependencies(DependencyMatrix, i, i - 1); | |||
528 | #ifdef DUMP_DEP_MATRICIES | |||
529 | LLVM_DEBUG(dbgs() << "Dependence after interchange\n")do { } while (false); | |||
530 | printDepMatrix(DependencyMatrix); | |||
531 | #endif | |||
532 | Changed |= Interchanged; | |||
533 | } | |||
534 | return Changed; | |||
535 | } | |||
536 | ||||
537 | bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId, | |||
538 | unsigned OuterLoopId, | |||
539 | std::vector<std::vector<char>> &DependencyMatrix) { | |||
540 | LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopIddo { } while (false) | |||
541 | << " and OuterLoopId = " << OuterLoopId << "\n")do { } while (false); | |||
542 | LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE); | |||
543 | if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { | |||
544 | LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n")do { } while (false); | |||
545 | return false; | |||
546 | } | |||
547 | LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n")do { } while (false); | |||
548 | LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); | |||
549 | if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) { | |||
550 | LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n")do { } while (false); | |||
551 | return false; | |||
552 | } | |||
553 | ||||
554 | ORE->emit([&]() { | |||
555 | return OptimizationRemark(DEBUG_TYPE"loop-interchange", "Interchanged", | |||
556 | InnerLoop->getStartLoc(), | |||
557 | InnerLoop->getHeader()) | |||
558 | << "Loop interchanged with enclosing loop."; | |||
559 | }); | |||
560 | ||||
561 | LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL); | |||
562 | LIT.transform(); | |||
563 | LLVM_DEBUG(dbgs() << "Loops interchanged.\n")do { } while (false); | |||
564 | LoopsInterchanged++; | |||
565 | ||||
566 | assert(InnerLoop->isLCSSAForm(*DT) &&((void)0) | |||
567 | "Inner loop not left in LCSSA form after loop interchange!")((void)0); | |||
568 | assert(OuterLoop->isLCSSAForm(*DT) &&((void)0) | |||
569 | "Outer loop not left in LCSSA form after loop interchange!")((void)0); | |||
570 | ||||
571 | return true; | |||
572 | } | |||
573 | }; | |||
574 | ||||
575 | } // end anonymous namespace | |||
576 | ||||
577 | bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) { | |||
578 | return any_of(*BB, [](const Instruction &I) { | |||
579 | return I.mayHaveSideEffects() || I.mayReadFromMemory(); | |||
580 | }); | |||
581 | } | |||
582 | ||||
583 | bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { | |||
584 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); | |||
585 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); | |||
586 | BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); | |||
587 | ||||
588 | LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n")do { } while (false); | |||
589 | ||||
590 | // A perfectly nested loop will not have any branch in between the outer and | |||
591 | // inner block i.e. outer header will branch to either inner preheader and | |||
592 | // outerloop latch. | |||
593 | BranchInst *OuterLoopHeaderBI = | |||
594 | dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); | |||
595 | if (!OuterLoopHeaderBI) | |||
596 | return false; | |||
597 | ||||
598 | for (BasicBlock *Succ : successors(OuterLoopHeaderBI)) | |||
599 | if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() && | |||
600 | Succ != OuterLoopLatch) | |||
601 | return false; | |||
602 | ||||
603 | LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n")do { } while (false); | |||
604 | // We do not have any basic block in between now make sure the outer header | |||
605 | // and outer loop latch doesn't contain any unsafe instructions. | |||
606 | if (containsUnsafeInstructions(OuterLoopHeader) || | |||
607 | containsUnsafeInstructions(OuterLoopLatch)) | |||
608 | return false; | |||
609 | ||||
610 | // Also make sure the inner loop preheader does not contain any unsafe | |||
611 | // instructions. Note that all instructions in the preheader will be moved to | |||
612 | // the outer loop header when interchanging. | |||
613 | if (InnerLoopPreHeader != OuterLoopHeader && | |||
614 | containsUnsafeInstructions(InnerLoopPreHeader)) | |||
615 | return false; | |||
616 | ||||
617 | BasicBlock *InnerLoopExit = InnerLoop->getExitBlock(); | |||
618 | // Ensure the inner loop exit block flows to the outer loop latch possibly | |||
619 | // through empty blocks. | |||
620 | const BasicBlock &SuccInner = | |||
621 | LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch); | |||
622 | if (&SuccInner != OuterLoopLatch) { | |||
623 | LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExitdo { } while (false) | |||
624 | << " does not lead to the outer loop latch.\n";)do { } while (false); | |||
625 | return false; | |||
626 | } | |||
627 | // The inner loop exit block does flow to the outer loop latch and not some | |||
628 | // other BBs, now make sure it contains safe instructions, since it will be | |||
629 | // moved into the (new) inner loop after interchange. | |||
630 | if (containsUnsafeInstructions(InnerLoopExit)) | |||
631 | return false; | |||
632 | ||||
633 | LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n")do { } while (false); | |||
634 | // We have a perfect loop nest. | |||
635 | return true; | |||
636 | } | |||
637 | ||||
638 | bool LoopInterchangeLegality::isLoopStructureUnderstood( | |||
639 | PHINode *InnerInduction) { | |||
640 | unsigned Num = InnerInduction->getNumOperands(); | |||
641 | BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); | |||
642 | for (unsigned i = 0; i < Num; ++i) { | |||
643 | Value *Val = InnerInduction->getOperand(i); | |||
644 | if (isa<Constant>(Val)) | |||
645 | continue; | |||
646 | Instruction *I = dyn_cast<Instruction>(Val); | |||
647 | if (!I) | |||
648 | return false; | |||
649 | // TODO: Handle triangular loops. | |||
650 | // e.g. for(int i=0;i<N;i++) | |||
651 | // for(int j=i;j<N;j++) | |||
652 | unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i); | |||
653 | if (InnerInduction->getIncomingBlock(IncomBlockIndx) == | |||
654 | InnerLoopPreheader && | |||
655 | !OuterLoop->isLoopInvariant(I)) { | |||
656 | return false; | |||
657 | } | |||
658 | } | |||
659 | ||||
660 | // TODO: Handle triangular loops of another form. | |||
661 | // e.g. for(int i=0;i<N;i++) | |||
662 | // for(int j=0;j<i;j++) | |||
663 | // or, | |||
664 | // for(int i=0;i<N;i++) | |||
665 | // for(int j=0;j*i<N;j++) | |||
666 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); | |||
667 | BranchInst *InnerLoopLatchBI = | |||
668 | dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); | |||
669 | if (!InnerLoopLatchBI->isConditional()) | |||
670 | return false; | |||
671 | if (CmpInst *InnerLoopCmp = | |||
672 | dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) { | |||
673 | Value *Op0 = InnerLoopCmp->getOperand(0); | |||
674 | Value *Op1 = InnerLoopCmp->getOperand(1); | |||
675 | ||||
676 | // LHS and RHS of the inner loop exit condition, e.g., | |||
677 | // in "for(int j=0;j<i;j++)", LHS is j and RHS is i. | |||
678 | Value *Left = nullptr; | |||
679 | Value *Right = nullptr; | |||
680 | ||||
681 | // Check if V only involves inner loop induction variable. | |||
682 | // Return true if V is InnerInduction, or a cast from | |||
683 | // InnerInduction, or a binary operator that involves | |||
684 | // InnerInduction and a constant. | |||
685 | std::function<bool(Value *)> IsPathToIndVar; | |||
686 | IsPathToIndVar = [&InnerInduction, &IsPathToIndVar](Value *V) -> bool { | |||
687 | if (V == InnerInduction) | |||
688 | return true; | |||
689 | if (isa<Constant>(V)) | |||
690 | return true; | |||
691 | Instruction *I = dyn_cast<Instruction>(V); | |||
692 | if (!I) | |||
693 | return false; | |||
694 | if (isa<CastInst>(I)) | |||
695 | return IsPathToIndVar(I->getOperand(0)); | |||
696 | if (isa<BinaryOperator>(I)) | |||
697 | return IsPathToIndVar(I->getOperand(0)) && | |||
698 | IsPathToIndVar(I->getOperand(1)); | |||
699 | return false; | |||
700 | }; | |||
701 | ||||
702 | if (IsPathToIndVar(Op0) && !isa<Constant>(Op0)) { | |||
703 | Left = Op0; | |||
704 | Right = Op1; | |||
705 | } else if (IsPathToIndVar(Op1) && !isa<Constant>(Op1)) { | |||
706 | Left = Op1; | |||
707 | Right = Op0; | |||
708 | } | |||
709 | ||||
710 | if (Left == nullptr) | |||
711 | return false; | |||
712 | ||||
713 | const SCEV *S = SE->getSCEV(Right); | |||
714 | if (!SE->isLoopInvariant(S, OuterLoop)) | |||
715 | return false; | |||
716 | } | |||
717 | ||||
718 | return true; | |||
719 | } | |||
720 | ||||
721 | // If SV is a LCSSA PHI node with a single incoming value, return the incoming | |||
722 | // value. | |||
723 | static Value *followLCSSA(Value *SV) { | |||
724 | PHINode *PHI = dyn_cast<PHINode>(SV); | |||
725 | if (!PHI) | |||
726 | return SV; | |||
727 | ||||
728 | if (PHI->getNumIncomingValues() != 1) | |||
729 | return SV; | |||
730 | return followLCSSA(PHI->getIncomingValue(0)); | |||
731 | } | |||
732 | ||||
733 | // Check V's users to see if it is involved in a reduction in L. | |||
734 | static PHINode *findInnerReductionPhi(Loop *L, Value *V) { | |||
735 | // Reduction variables cannot be constants. | |||
736 | if (isa<Constant>(V)) | |||
737 | return nullptr; | |||
738 | ||||
739 | for (Value *User : V->users()) { | |||
740 | if (PHINode *PHI = dyn_cast<PHINode>(User)) { | |||
741 | if (PHI->getNumIncomingValues() == 1) | |||
742 | continue; | |||
743 | RecurrenceDescriptor RD; | |||
744 | if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) | |||
745 | return PHI; | |||
746 | return nullptr; | |||
747 | } | |||
748 | } | |||
749 | ||||
750 | return nullptr; | |||
751 | } | |||
752 | ||||
753 | bool LoopInterchangeLegality::findInductionAndReductions( | |||
754 | Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) { | |||
755 | if (!L->getLoopLatch() || !L->getLoopPredecessor()) | |||
756 | return false; | |||
757 | for (PHINode &PHI : L->getHeader()->phis()) { | |||
758 | RecurrenceDescriptor RD; | |||
759 | InductionDescriptor ID; | |||
760 | if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) | |||
761 | Inductions.push_back(&PHI); | |||
762 | else { | |||
763 | // PHIs in inner loops need to be part of a reduction in the outer loop, | |||
764 | // discovered when checking the PHIs of the outer loop earlier. | |||
765 | if (!InnerLoop) { | |||
766 | if (!OuterInnerReductions.count(&PHI)) { | |||
767 | LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "do { } while (false) | |||
768 | "across the outer loop.\n")do { } while (false); | |||
769 | return false; | |||
770 | } | |||
771 | } else { | |||
772 | assert(PHI.getNumIncomingValues() == 2 &&((void)0) | |||
773 | "Phis in loop header should have exactly 2 incoming values")((void)0); | |||
774 | // Check if we have a PHI node in the outer loop that has a reduction | |||
775 | // result from the inner loop as an incoming value. | |||
776 | Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch())); | |||
777 | PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V); | |||
778 | if (!InnerRedPhi || | |||
779 | !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) { | |||
780 | LLVM_DEBUG(do { } while (false) | |||
781 | dbgs()do { } while (false) | |||
782 | << "Failed to recognize PHI as an induction or reduction.\n")do { } while (false); | |||
783 | return false; | |||
784 | } | |||
785 | OuterInnerReductions.insert(&PHI); | |||
786 | OuterInnerReductions.insert(InnerRedPhi); | |||
787 | } | |||
788 | } | |||
789 | } | |||
790 | return true; | |||
791 | } | |||
792 | ||||
793 | // This function indicates the current limitations in the transform as a result | |||
794 | // of which we do not proceed. | |||
795 | bool LoopInterchangeLegality::currentLimitations() { | |||
796 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); | |||
797 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); | |||
798 | ||||
799 | // transform currently expects the loop latches to also be the exiting | |||
800 | // blocks. | |||
801 | if (InnerLoop->getExitingBlock() != InnerLoopLatch || | |||
802 | OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() || | |||
803 | !isa<BranchInst>(InnerLoopLatch->getTerminator()) || | |||
804 | !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) { | |||
805 | LLVM_DEBUG(do { } while (false) | |||
806 | dbgs() << "Loops where the latch is not the exiting block are not"do { } while (false) | |||
807 | << " supported currently.\n")do { } while (false); | |||
808 | ORE->emit([&]() { | |||
809 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "ExitingNotLatch", | |||
810 | OuterLoop->getStartLoc(), | |||
811 | OuterLoop->getHeader()) | |||
812 | << "Loops where the latch is not the exiting block cannot be" | |||
813 | " interchange currently."; | |||
814 | }); | |||
815 | return true; | |||
816 | } | |||
817 | ||||
818 | PHINode *InnerInductionVar; | |||
819 | SmallVector<PHINode *, 8> Inductions; | |||
820 | if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) { | |||
821 | LLVM_DEBUG(do { } while (false) | |||
822 | dbgs() << "Only outer loops with induction or reduction PHI nodes "do { } while (false) | |||
823 | << "are supported currently.\n")do { } while (false); | |||
824 | ORE->emit([&]() { | |||
825 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIOuter", | |||
826 | OuterLoop->getStartLoc(), | |||
827 | OuterLoop->getHeader()) | |||
828 | << "Only outer loops with induction or reduction PHI nodes can be" | |||
829 | " interchanged currently."; | |||
830 | }); | |||
831 | return true; | |||
832 | } | |||
833 | ||||
834 | // TODO: Currently we handle only loops with 1 induction variable. | |||
835 | if (Inductions.size() != 1) { | |||
836 | LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "do { } while (false) | |||
837 | << "supported currently.\n")do { } while (false); | |||
838 | ORE->emit([&]() { | |||
839 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "MultiIndutionOuter", | |||
840 | OuterLoop->getStartLoc(), | |||
841 | OuterLoop->getHeader()) | |||
842 | << "Only outer loops with 1 induction variable can be " | |||
843 | "interchanged currently."; | |||
844 | }); | |||
845 | return true; | |||
846 | } | |||
847 | ||||
848 | Inductions.clear(); | |||
849 | if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) { | |||
850 | LLVM_DEBUG(do { } while (false) | |||
851 | dbgs() << "Only inner loops with induction or reduction PHI nodes "do { } while (false) | |||
852 | << "are supported currently.\n")do { } while (false); | |||
853 | ORE->emit([&]() { | |||
854 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedPHIInner", | |||
855 | InnerLoop->getStartLoc(), | |||
856 | InnerLoop->getHeader()) | |||
857 | << "Only inner loops with induction or reduction PHI nodes can be" | |||
858 | " interchange currently."; | |||
859 | }); | |||
860 | return true; | |||
861 | } | |||
862 | ||||
863 | // TODO: Currently we handle only loops with 1 induction variable. | |||
864 | if (Inductions.size() != 1) { | |||
865 | LLVM_DEBUG(do { } while (false) | |||
866 | dbgs() << "We currently only support loops with 1 induction variable."do { } while (false) | |||
867 | << "Failed to interchange due to current limitation\n")do { } while (false); | |||
868 | ORE->emit([&]() { | |||
869 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "MultiInductionInner", | |||
870 | InnerLoop->getStartLoc(), | |||
871 | InnerLoop->getHeader()) | |||
872 | << "Only inner loops with 1 induction variable can be " | |||
873 | "interchanged currently."; | |||
874 | }); | |||
875 | return true; | |||
876 | } | |||
877 | InnerInductionVar = Inductions.pop_back_val(); | |||
878 | ||||
879 | // TODO: Triangular loops are not handled for now. | |||
880 | if (!isLoopStructureUnderstood(InnerInductionVar)) { | |||
881 | LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n")do { } while (false); | |||
882 | ORE->emit([&]() { | |||
883 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedStructureInner", | |||
884 | InnerLoop->getStartLoc(), | |||
885 | InnerLoop->getHeader()) | |||
886 | << "Inner loop structure not understood currently."; | |||
887 | }); | |||
888 | return true; | |||
889 | } | |||
890 | ||||
891 | // TODO: Current limitation: Since we split the inner loop latch at the point | |||
892 | // were induction variable is incremented (induction.next); We cannot have | |||
893 | // more than 1 user of induction.next since it would result in broken code | |||
894 | // after split. | |||
895 | // e.g. | |||
896 | // for(i=0;i<N;i++) { | |||
897 | // for(j = 0;j<M;j++) { | |||
898 | // A[j+1][i+2] = A[j][i]+k; | |||
899 | // } | |||
900 | // } | |||
901 | Instruction *InnerIndexVarInc = nullptr; | |||
902 | if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader) | |||
903 | InnerIndexVarInc = | |||
904 | dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1)); | |||
905 | else | |||
906 | InnerIndexVarInc = | |||
907 | dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0)); | |||
908 | ||||
909 | if (!InnerIndexVarInc) { | |||
910 | LLVM_DEBUG(do { } while (false) | |||
911 | dbgs() << "Did not find an instruction to increment the induction "do { } while (false) | |||
912 | << "variable.\n")do { } while (false); | |||
913 | ORE->emit([&]() { | |||
914 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NoIncrementInInner", | |||
915 | InnerLoop->getStartLoc(), | |||
916 | InnerLoop->getHeader()) | |||
917 | << "The inner loop does not increment the induction variable."; | |||
918 | }); | |||
919 | return true; | |||
920 | } | |||
921 | ||||
922 | // Since we split the inner loop latch on this induction variable. Make sure | |||
923 | // we do not have any instruction between the induction variable and branch | |||
924 | // instruction. | |||
925 | ||||
926 | bool FoundInduction = false; | |||
927 | for (const Instruction &I : | |||
928 | llvm::reverse(InnerLoopLatch->instructionsWithoutDebug())) { | |||
929 | if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I) || | |||
930 | isa<ZExtInst>(I)) | |||
931 | continue; | |||
932 | ||||
933 | // We found an instruction. If this is not induction variable then it is not | |||
934 | // safe to split this loop latch. | |||
935 | if (!I.isIdenticalTo(InnerIndexVarInc)) { | |||
936 | LLVM_DEBUG(dbgs() << "Found unsupported instructions between induction "do { } while (false) | |||
937 | << "variable increment and branch.\n")do { } while (false); | |||
938 | ORE->emit([&]() { | |||
939 | return OptimizationRemarkMissed( | |||
940 | DEBUG_TYPE"loop-interchange", "UnsupportedInsBetweenInduction", | |||
941 | InnerLoop->getStartLoc(), InnerLoop->getHeader()) | |||
942 | << "Found unsupported instruction between induction variable " | |||
943 | "increment and branch."; | |||
944 | }); | |||
945 | return true; | |||
946 | } | |||
947 | ||||
948 | FoundInduction = true; | |||
949 | break; | |||
950 | } | |||
951 | // The loop latch ended and we didn't find the induction variable return as | |||
952 | // current limitation. | |||
953 | if (!FoundInduction) { | |||
954 | LLVM_DEBUG(dbgs() << "Did not find the induction variable.\n")do { } while (false); | |||
955 | ORE->emit([&]() { | |||
956 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NoIndutionVariable", | |||
957 | InnerLoop->getStartLoc(), | |||
958 | InnerLoop->getHeader()) | |||
959 | << "Did not find the induction variable."; | |||
960 | }); | |||
961 | return true; | |||
962 | } | |||
963 | return false; | |||
964 | } | |||
965 | ||||
966 | // We currently only support LCSSA PHI nodes in the inner loop exit, if their | |||
967 | // users are either reduction PHIs or PHIs outside the outer loop (which means | |||
968 | // the we are only interested in the final value after the loop). | |||
969 | static bool | |||
970 | areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, | |||
971 | SmallPtrSetImpl<PHINode *> &Reductions) { | |||
972 | BasicBlock *InnerExit = OuterL->getUniqueExitBlock(); | |||
973 | for (PHINode &PHI : InnerExit->phis()) { | |||
974 | // Reduction lcssa phi will have only 1 incoming block that from loop latch. | |||
975 | if (PHI.getNumIncomingValues() > 1) | |||
976 | return false; | |||
977 | if (any_of(PHI.users(), [&Reductions, OuterL](User *U) { | |||
978 | PHINode *PN = dyn_cast<PHINode>(U); | |||
979 | return !PN || | |||
980 | (!Reductions.count(PN) && OuterL->contains(PN->getParent())); | |||
981 | })) { | |||
982 | return false; | |||
983 | } | |||
984 | } | |||
985 | return true; | |||
986 | } | |||
987 | ||||
988 | // We currently support LCSSA PHI nodes in the outer loop exit, if their | |||
989 | // incoming values do not come from the outer loop latch or if the | |||
990 | // outer loop latch has a single predecessor. In that case, the value will | |||
991 | // be available if both the inner and outer loop conditions are true, which | |||
992 | // will still be true after interchanging. If we have multiple predecessor, | |||
993 | // that may not be the case, e.g. because the outer loop latch may be executed | |||
994 | // if the inner loop is not executed. | |||
995 | static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { | |||
996 | BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock(); | |||
997 | for (PHINode &PHI : LoopNestExit->phis()) { | |||
998 | // FIXME: We currently are not able to detect floating point reductions | |||
999 | // and have to use floating point PHIs as a proxy to prevent | |||
1000 | // interchanging in the presence of floating point reductions. | |||
1001 | if (PHI.getType()->isFloatingPointTy()) | |||
1002 | return false; | |||
1003 | for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { | |||
1004 | Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i)); | |||
1005 | if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch()) | |||
1006 | continue; | |||
1007 | ||||
1008 | // The incoming value is defined in the outer loop latch. Currently we | |||
1009 | // only support that in case the outer loop latch has a single predecessor. | |||
1010 | // This guarantees that the outer loop latch is executed if and only if | |||
1011 | // the inner loop is executed (because tightlyNested() guarantees that the | |||
1012 | // outer loop header only branches to the inner loop or the outer loop | |||
1013 | // latch). | |||
1014 | // FIXME: We could weaken this logic and allow multiple predecessors, | |||
1015 | // if the values are produced outside the loop latch. We would need | |||
1016 | // additional logic to update the PHI nodes in the exit block as | |||
1017 | // well. | |||
1018 | if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr) | |||
1019 | return false; | |||
1020 | } | |||
1021 | } | |||
1022 | return true; | |||
1023 | } | |||
1024 | ||||
1025 | // In case of multi-level nested loops, it may occur that lcssa phis exist in | |||
1026 | // the latch of InnerLoop, i.e., when defs of the incoming values are further | |||
1027 | // inside the loopnest. Sometimes those incoming values are not available | |||
1028 | // after interchange, since the original inner latch will become the new outer | |||
1029 | // latch which may have predecessor paths that do not include those incoming | |||
1030 | // values. | |||
1031 | // TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of | |||
1032 | // multi-level loop nests. | |||
1033 | static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { | |||
1034 | if (InnerLoop->getSubLoops().empty()) | |||
1035 | return true; | |||
1036 | // If the original outer latch has only one predecessor, then values defined | |||
1037 | // further inside the looploop, e.g., in the innermost loop, will be available | |||
1038 | // at the new outer latch after interchange. | |||
1039 | if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr) | |||
1040 | return true; | |||
1041 | ||||
1042 | // The outer latch has more than one predecessors, i.e., the inner | |||
1043 | // exit and the inner header. | |||
1044 | // PHI nodes in the inner latch are lcssa phis where the incoming values | |||
1045 | // are defined further inside the loopnest. Check if those phis are used | |||
1046 | // in the original inner latch. If that is the case then bail out since | |||
1047 | // those incoming values may not be available at the new outer latch. | |||
1048 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); | |||
1049 | for (PHINode &PHI : InnerLoopLatch->phis()) { | |||
1050 | for (auto *U : PHI.users()) { | |||
1051 | Instruction *UI = cast<Instruction>(U); | |||
1052 | if (InnerLoopLatch == UI->getParent()) | |||
1053 | return false; | |||
1054 | } | |||
1055 | } | |||
1056 | return true; | |||
1057 | } | |||
1058 | ||||
1059 | bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, | |||
1060 | unsigned OuterLoopId, | |||
1061 | CharMatrix &DepMatrix) { | |||
1062 | if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) { | |||
1063 | LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopIddo { } while (false) | |||
1064 | << " and OuterLoopId = " << OuterLoopIddo { } while (false) | |||
1065 | << " due to dependence\n")do { } while (false); | |||
1066 | ORE->emit([&]() { | |||
1067 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "Dependence", | |||
1068 | InnerLoop->getStartLoc(), | |||
1069 | InnerLoop->getHeader()) | |||
1070 | << "Cannot interchange loops due to dependences."; | |||
1071 | }); | |||
1072 | return false; | |||
1073 | } | |||
1074 | // Check if outer and inner loop contain legal instructions only. | |||
1075 | for (auto *BB : OuterLoop->blocks()) | |||
1076 | for (Instruction &I : BB->instructionsWithoutDebug()) | |||
1077 | if (CallInst *CI = dyn_cast<CallInst>(&I)) { | |||
1078 | // readnone functions do not prevent interchanging. | |||
1079 | if (CI->doesNotReadMemory()) | |||
1080 | continue; | |||
1081 | LLVM_DEBUG(do { } while (false) | |||
1082 | dbgs() << "Loops with call instructions cannot be interchanged "do { } while (false) | |||
1083 | << "safely.")do { } while (false); | |||
1084 | ORE->emit([&]() { | |||
1085 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "CallInst", | |||
1086 | CI->getDebugLoc(), | |||
1087 | CI->getParent()) | |||
1088 | << "Cannot interchange loops due to call instruction."; | |||
1089 | }); | |||
1090 | ||||
1091 | return false; | |||
1092 | } | |||
1093 | ||||
1094 | if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) { | |||
1095 | LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n")do { } while (false); | |||
1096 | ORE->emit([&]() { | |||
1097 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedInnerLatchPHI", | |||
1098 | InnerLoop->getStartLoc(), | |||
1099 | InnerLoop->getHeader()) | |||
1100 | << "Cannot interchange loops because unsupported PHI nodes found " | |||
1101 | "in inner loop latch."; | |||
1102 | }); | |||
1103 | return false; | |||
1104 | } | |||
1105 | ||||
1106 | // TODO: The loops could not be interchanged due to current limitations in the | |||
1107 | // transform module. | |||
1108 | if (currentLimitations()) { | |||
1109 | LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n")do { } while (false); | |||
1110 | return false; | |||
1111 | } | |||
1112 | ||||
1113 | // Check if the loops are tightly nested. | |||
1114 | if (!tightlyNested(OuterLoop, InnerLoop)) { | |||
1115 | LLVM_DEBUG(dbgs() << "Loops not tightly nested\n")do { } while (false); | |||
1116 | ORE->emit([&]() { | |||
1117 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "NotTightlyNested", | |||
1118 | InnerLoop->getStartLoc(), | |||
1119 | InnerLoop->getHeader()) | |||
1120 | << "Cannot interchange loops because they are not tightly " | |||
1121 | "nested."; | |||
1122 | }); | |||
1123 | return false; | |||
1124 | } | |||
1125 | ||||
1126 | if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop, | |||
1127 | OuterInnerReductions)) { | |||
1128 | LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n")do { } while (false); | |||
1129 | ORE->emit([&]() { | |||
1130 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI", | |||
1131 | InnerLoop->getStartLoc(), | |||
1132 | InnerLoop->getHeader()) | |||
1133 | << "Found unsupported PHI node in loop exit."; | |||
1134 | }); | |||
1135 | return false; | |||
1136 | } | |||
1137 | ||||
1138 | if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) { | |||
1139 | LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n")do { } while (false); | |||
1140 | ORE->emit([&]() { | |||
1141 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "UnsupportedExitPHI", | |||
1142 | OuterLoop->getStartLoc(), | |||
1143 | OuterLoop->getHeader()) | |||
1144 | << "Found unsupported PHI node in loop exit."; | |||
1145 | }); | |||
1146 | return false; | |||
1147 | } | |||
1148 | ||||
1149 | return true; | |||
1150 | } | |||
1151 | ||||
1152 | int LoopInterchangeProfitability::getInstrOrderCost() { | |||
1153 | unsigned GoodOrder, BadOrder; | |||
1154 | BadOrder = GoodOrder = 0; | |||
1155 | for (BasicBlock *BB : InnerLoop->blocks()) { | |||
1156 | for (Instruction &Ins : *BB) { | |||
1157 | if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) { | |||
1158 | unsigned NumOp = GEP->getNumOperands(); | |||
1159 | bool FoundInnerInduction = false; | |||
1160 | bool FoundOuterInduction = false; | |||
1161 | for (unsigned i = 0; i < NumOp; ++i) { | |||
1162 | // Skip operands that are not SCEV-able. | |||
1163 | if (!SE->isSCEVable(GEP->getOperand(i)->getType())) | |||
1164 | continue; | |||
1165 | ||||
1166 | const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i)); | |||
1167 | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal); | |||
1168 | if (!AR) | |||
1169 | continue; | |||
1170 | ||||
1171 | // If we find the inner induction after an outer induction e.g. | |||
1172 | // for(int i=0;i<N;i++) | |||
1173 | // for(int j=0;j<N;j++) | |||
1174 | // A[i][j] = A[i-1][j-1]+k; | |||
1175 | // then it is a good order. | |||
1176 | if (AR->getLoop() == InnerLoop) { | |||
1177 | // We found an InnerLoop induction after OuterLoop induction. It is | |||
1178 | // a good order. | |||
1179 | FoundInnerInduction = true; | |||
1180 | if (FoundOuterInduction) { | |||
1181 | GoodOrder++; | |||
1182 | break; | |||
1183 | } | |||
1184 | } | |||
1185 | // If we find the outer induction after an inner induction e.g. | |||
1186 | // for(int i=0;i<N;i++) | |||
1187 | // for(int j=0;j<N;j++) | |||
1188 | // A[j][i] = A[j-1][i-1]+k; | |||
1189 | // then it is a bad order. | |||
1190 | if (AR->getLoop() == OuterLoop) { | |||
1191 | // We found an OuterLoop induction after InnerLoop induction. It is | |||
1192 | // a bad order. | |||
1193 | FoundOuterInduction = true; | |||
1194 | if (FoundInnerInduction) { | |||
1195 | BadOrder++; | |||
1196 | break; | |||
1197 | } | |||
1198 | } | |||
1199 | } | |||
1200 | } | |||
1201 | } | |||
1202 | } | |||
1203 | return GoodOrder - BadOrder; | |||
1204 | } | |||
1205 | ||||
1206 | static bool isProfitableForVectorization(unsigned InnerLoopId, | |||
1207 | unsigned OuterLoopId, | |||
1208 | CharMatrix &DepMatrix) { | |||
1209 | // TODO: Improve this heuristic to catch more cases. | |||
1210 | // If the inner loop is loop independent or doesn't carry any dependency it is | |||
1211 | // profitable to move this to outer position. | |||
1212 | for (auto &Row : DepMatrix) { | |||
1213 | if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I') | |||
1214 | return false; | |||
1215 | // TODO: We need to improve this heuristic. | |||
1216 | if (Row[OuterLoopId] != '=') | |||
1217 | return false; | |||
1218 | } | |||
1219 | // If outer loop has dependence and inner loop is loop independent then it is | |||
1220 | // profitable to interchange to enable parallelism. | |||
1221 | // If there are no dependences, interchanging will not improve anything. | |||
1222 | return !DepMatrix.empty(); | |||
1223 | } | |||
1224 | ||||
1225 | bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, | |||
1226 | unsigned OuterLoopId, | |||
1227 | CharMatrix &DepMatrix) { | |||
1228 | // TODO: Add better profitability checks. | |||
1229 | // e.g | |||
1230 | // 1) Construct dependency matrix and move the one with no loop carried dep | |||
1231 | // inside to enable vectorization. | |||
1232 | ||||
1233 | // This is rough cost estimation algorithm. It counts the good and bad order | |||
1234 | // of induction variables in the instruction and allows reordering if number | |||
1235 | // of bad orders is more than good. | |||
1236 | int Cost = getInstrOrderCost(); | |||
1237 | LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n")do { } while (false); | |||
1238 | if (Cost < -LoopInterchangeCostThreshold) | |||
1239 | return true; | |||
1240 | ||||
1241 | // It is not profitable as per current cache profitability model. But check if | |||
1242 | // we can move this loop outside to improve parallelism. | |||
1243 | if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix)) | |||
1244 | return true; | |||
1245 | ||||
1246 | ORE->emit([&]() { | |||
1247 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-interchange", "InterchangeNotProfitable", | |||
1248 | InnerLoop->getStartLoc(), | |||
1249 | InnerLoop->getHeader()) | |||
1250 | << "Interchanging loops is too costly (cost=" | |||
1251 | << ore::NV("Cost", Cost) << ", threshold=" | |||
1252 | << ore::NV("Threshold", LoopInterchangeCostThreshold) | |||
1253 | << ") and it does not improve parallelism."; | |||
1254 | }); | |||
1255 | return false; | |||
1256 | } | |||
1257 | ||||
1258 | void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, | |||
1259 | Loop *InnerLoop) { | |||
1260 | for (Loop *L : *OuterLoop) | |||
1261 | if (L == InnerLoop) { | |||
1262 | OuterLoop->removeChildLoop(L); | |||
1263 | return; | |||
1264 | } | |||
1265 | llvm_unreachable("Couldn't find loop")__builtin_unreachable(); | |||
1266 | } | |||
1267 | ||||
1268 | /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the | |||
1269 | /// new inner and outer loop after interchanging: NewInner is the original | |||
1270 | /// outer loop and NewOuter is the original inner loop. | |||
1271 | /// | |||
1272 | /// Before interchanging, we have the following structure | |||
1273 | /// Outer preheader | |||
1274 | // Outer header | |||
1275 | // Inner preheader | |||
1276 | // Inner header | |||
1277 | // Inner body | |||
1278 | // Inner latch | |||
1279 | // outer bbs | |||
1280 | // Outer latch | |||
1281 | // | |||
1282 | // After interchanging: | |||
1283 | // Inner preheader | |||
1284 | // Inner header | |||
1285 | // Outer preheader | |||
1286 | // Outer header | |||
1287 | // Inner body | |||
1288 | // outer bbs | |||
1289 | // Outer latch | |||
1290 | // Inner latch | |||
1291 | void LoopInterchangeTransform::restructureLoops( | |||
1292 | Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader, | |||
1293 | BasicBlock *OrigOuterPreHeader) { | |||
1294 | Loop *OuterLoopParent = OuterLoop->getParentLoop(); | |||
1295 | // The original inner loop preheader moves from the new inner loop to | |||
1296 | // the parent loop, if there is one. | |||
1297 | NewInner->removeBlockFromLoop(OrigInnerPreHeader); | |||
1298 | LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent); | |||
1299 | ||||
1300 | // Switch the loop levels. | |||
1301 | if (OuterLoopParent) { | |||
1302 | // Remove the loop from its parent loop. | |||
1303 | removeChildLoop(OuterLoopParent, NewInner); | |||
1304 | removeChildLoop(NewInner, NewOuter); | |||
1305 | OuterLoopParent->addChildLoop(NewOuter); | |||
1306 | } else { | |||
1307 | removeChildLoop(NewInner, NewOuter); | |||
1308 | LI->changeTopLevelLoop(NewInner, NewOuter); | |||
1309 | } | |||
1310 | while (!NewOuter->isInnermost()) | |||
1311 | NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin())); | |||
1312 | NewOuter->addChildLoop(NewInner); | |||
1313 | ||||
1314 | // BBs from the original inner loop. | |||
1315 | SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks()); | |||
1316 | ||||
1317 | // Add BBs from the original outer loop to the original inner loop (excluding | |||
1318 | // BBs already in inner loop) | |||
1319 | for (BasicBlock *BB : NewInner->blocks()) | |||
1320 | if (LI->getLoopFor(BB) == NewInner) | |||
1321 | NewOuter->addBlockEntry(BB); | |||
1322 | ||||
1323 | // Now remove inner loop header and latch from the new inner loop and move | |||
1324 | // other BBs (the loop body) to the new inner loop. | |||
1325 | BasicBlock *OuterHeader = NewOuter->getHeader(); | |||
1326 | BasicBlock *OuterLatch = NewOuter->getLoopLatch(); | |||
1327 | for (BasicBlock *BB : OrigInnerBBs) { | |||
1328 | // Nothing will change for BBs in child loops. | |||
1329 | if (LI->getLoopFor(BB) != NewOuter) | |||
1330 | continue; | |||
1331 | // Remove the new outer loop header and latch from the new inner loop. | |||
1332 | if (BB == OuterHeader || BB == OuterLatch) | |||
1333 | NewInner->removeBlockFromLoop(BB); | |||
1334 | else | |||
1335 | LI->changeLoopFor(BB, NewInner); | |||
1336 | } | |||
1337 | ||||
1338 | // The preheader of the original outer loop becomes part of the new | |||
1339 | // outer loop. | |||
1340 | NewOuter->addBlockEntry(OrigOuterPreHeader); | |||
1341 | LI->changeLoopFor(OrigOuterPreHeader, NewOuter); | |||
1342 | ||||
1343 | // Tell SE that we move the loops around. | |||
1344 | SE->forgetLoop(NewOuter); | |||
1345 | SE->forgetLoop(NewInner); | |||
1346 | } | |||
1347 | ||||
1348 | bool LoopInterchangeTransform::transform() { | |||
1349 | bool Transformed = false; | |||
1350 | Instruction *InnerIndexVar; | |||
1351 | ||||
1352 | if (InnerLoop->getSubLoops().empty()) { | |||
1353 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); | |||
1354 | LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n")do { } while (false); | |||
1355 | PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); | |||
1356 | if (!InductionPHI) { | |||
1357 | LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n")do { } while (false); | |||
1358 | return false; | |||
1359 | } | |||
1360 | ||||
1361 | if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader) | |||
1362 | InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1)); | |||
1363 | else | |||
1364 | InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0)); | |||
1365 | ||||
1366 | // Ensure that InductionPHI is the first Phi node. | |||
1367 | if (&InductionPHI->getParent()->front() != InductionPHI) | |||
1368 | InductionPHI->moveBefore(&InductionPHI->getParent()->front()); | |||
1369 | ||||
1370 | // Create a new latch block for the inner loop. We split at the | |||
1371 | // current latch's terminator and then move the condition and all | |||
1372 | // operands that are not either loop-invariant or the induction PHI into the | |||
1373 | // new latch block. | |||
1374 | BasicBlock *NewLatch = | |||
1375 | SplitBlock(InnerLoop->getLoopLatch(), | |||
1376 | InnerLoop->getLoopLatch()->getTerminator(), DT, LI); | |||
1377 | ||||
1378 | SmallSetVector<Instruction *, 4> WorkList; | |||
1379 | unsigned i = 0; | |||
1380 | auto MoveInstructions = [&i, &WorkList, this, InductionPHI, NewLatch]() { | |||
1381 | for (; i < WorkList.size(); i++) { | |||
1382 | // Duplicate instruction and move it the new latch. Update uses that | |||
1383 | // have been moved. | |||
1384 | Instruction *NewI = WorkList[i]->clone(); | |||
1385 | NewI->insertBefore(NewLatch->getFirstNonPHI()); | |||
1386 | assert(!NewI->mayHaveSideEffects() &&((void)0) | |||
1387 | "Moving instructions with side-effects may change behavior of "((void)0) | |||
1388 | "the loop nest!")((void)0); | |||
1389 | for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) { | |||
1390 | Instruction *UserI = cast<Instruction>(U.getUser()); | |||
1391 | if (!InnerLoop->contains(UserI->getParent()) || | |||
1392 | UserI->getParent() == NewLatch || UserI == InductionPHI) | |||
1393 | U.set(NewI); | |||
1394 | } | |||
1395 | // Add operands of moved instruction to the worklist, except if they are | |||
1396 | // outside the inner loop or are the induction PHI. | |||
1397 | for (Value *Op : WorkList[i]->operands()) { | |||
1398 | Instruction *OpI = dyn_cast<Instruction>(Op); | |||
1399 | if (!OpI || | |||
1400 | this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop || | |||
1401 | OpI == InductionPHI) | |||
1402 | continue; | |||
1403 | WorkList.insert(OpI); | |||
1404 | } | |||
1405 | } | |||
1406 | }; | |||
1407 | ||||
1408 | // FIXME: Should we interchange when we have a constant condition? | |||
1409 | Instruction *CondI = dyn_cast<Instruction>( | |||
1410 | cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator()) | |||
1411 | ->getCondition()); | |||
1412 | if (CondI) | |||
1413 | WorkList.insert(CondI); | |||
1414 | MoveInstructions(); | |||
1415 | WorkList.insert(cast<Instruction>(InnerIndexVar)); | |||
1416 | MoveInstructions(); | |||
1417 | ||||
1418 | // Splits the inner loops phi nodes out into a separate basic block. | |||
1419 | BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); | |||
1420 | SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI); | |||
1421 | LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n")do { } while (false); | |||
1422 | } | |||
1423 | ||||
1424 | // Instructions in the original inner loop preheader may depend on values | |||
1425 | // defined in the outer loop header. Move them there, because the original | |||
1426 | // inner loop preheader will become the entry into the interchanged loop nest. | |||
1427 | // Currently we move all instructions and rely on LICM to move invariant | |||
1428 | // instructions outside the loop nest. | |||
1429 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); | |||
1430 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); | |||
1431 | if (InnerLoopPreHeader != OuterLoopHeader) { | |||
1432 | SmallPtrSet<Instruction *, 4> NeedsMoving; | |||
1433 | for (Instruction &I : | |||
1434 | make_early_inc_range(make_range(InnerLoopPreHeader->begin(), | |||
1435 | std::prev(InnerLoopPreHeader->end())))) | |||
1436 | I.moveBefore(OuterLoopHeader->getTerminator()); | |||
1437 | } | |||
1438 | ||||
1439 | Transformed |= adjustLoopLinks(); | |||
1440 | if (!Transformed) { | |||
1441 | LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n")do { } while (false); | |||
1442 | return false; | |||
1443 | } | |||
1444 | ||||
1445 | return true; | |||
1446 | } | |||
1447 | ||||
1448 | /// \brief Move all instructions except the terminator from FromBB right before | |||
1449 | /// InsertBefore | |||
1450 | static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { | |||
1451 | auto &ToList = InsertBefore->getParent()->getInstList(); | |||
1452 | auto &FromList = FromBB->getInstList(); | |||
1453 | ||||
1454 | ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(), | |||
1455 | FromBB->getTerminator()->getIterator()); | |||
1456 | } | |||
1457 | ||||
1458 | /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact. | |||
1459 | static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) { | |||
1460 | // Save all non-terminator instructions of BB1 into TempInstrs and unlink them | |||
1461 | // from BB1 afterwards. | |||
1462 | auto Iter = map_range(*BB1, [](Instruction &I) { return &I; }); | |||
1463 | SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end())); | |||
1464 | for (Instruction *I : TempInstrs) | |||
1465 | I->removeFromParent(); | |||
1466 | ||||
1467 | // Move instructions from BB2 to BB1. | |||
1468 | moveBBContents(BB2, BB1->getTerminator()); | |||
1469 | ||||
1470 | // Move instructions from TempInstrs to BB2. | |||
1471 | for (Instruction *I : TempInstrs) | |||
1472 | I->insertBefore(BB2->getTerminator()); | |||
1473 | } | |||
1474 | ||||
1475 | // Update BI to jump to NewBB instead of OldBB. Records updates to the | |||
1476 | // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that | |||
1477 | // \p OldBB is exactly once in BI's successor list. | |||
1478 | static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, | |||
1479 | BasicBlock *NewBB, | |||
1480 | std::vector<DominatorTree::UpdateType> &DTUpdates, | |||
1481 | bool MustUpdateOnce = true) { | |||
1482 | assert((!MustUpdateOnce ||((void)0) | |||
1483 | llvm::count_if(successors(BI),((void)0) | |||
1484 | [OldBB](BasicBlock *BB) {((void)0) | |||
1485 | return BB == OldBB;((void)0) | |||
1486 | }) == 1) && "BI must jump to OldBB exactly once.")((void)0); | |||
1487 | bool Changed = false; | |||
1488 | for (Use &Op : BI->operands()) | |||
1489 | if (Op == OldBB) { | |||
1490 | Op.set(NewBB); | |||
1491 | Changed = true; | |||
1492 | } | |||
1493 | ||||
1494 | if (Changed) { | |||
1495 | DTUpdates.push_back( | |||
1496 | {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB}); | |||
1497 | DTUpdates.push_back( | |||
1498 | {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB}); | |||
1499 | } | |||
1500 | assert(Changed && "Expected a successor to be updated")((void)0); | |||
1501 | } | |||
1502 | ||||
1503 | // Move Lcssa PHIs to the right place. | |||
1504 | static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, | |||
1505 | BasicBlock *InnerLatch, BasicBlock *OuterHeader, | |||
1506 | BasicBlock *OuterLatch, BasicBlock *OuterExit, | |||
1507 | Loop *InnerLoop, LoopInfo *LI) { | |||
1508 | ||||
1509 | // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are | |||
1510 | // defined either in the header or latch. Those blocks will become header and | |||
1511 | // latch of the new outer loop, and the only possible users can PHI nodes | |||
1512 | // in the exit block of the loop nest or the outer loop header (reduction | |||
1513 | // PHIs, in that case, the incoming value must be defined in the inner loop | |||
1514 | // header). We can just substitute the user with the incoming value and remove | |||
1515 | // the PHI. | |||
1516 | for (PHINode &P : make_early_inc_range(InnerExit->phis())) { | |||
1517 | assert(P.getNumIncomingValues() == 1 &&((void)0) | |||
1518 | "Only loops with a single exit are supported!")((void)0); | |||
1519 | ||||
1520 | // Incoming values are guaranteed be instructions currently. | |||
1521 | auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch)); | |||
1522 | // Skip phis with incoming values from the inner loop body, excluding the | |||
1523 | // header and latch. | |||
1524 | if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader) | |||
1525 | continue; | |||
1526 | ||||
1527 | assert(all_of(P.users(),((void)0) | |||
1528 | [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {((void)0) | |||
1529 | return (cast<PHINode>(U)->getParent() == OuterHeader &&((void)0) | |||
1530 | IncI->getParent() == InnerHeader) ||((void)0) | |||
1531 | cast<PHINode>(U)->getParent() == OuterExit;((void)0) | |||
1532 | }) &&((void)0) | |||
1533 | "Can only replace phis iff the uses are in the loop nest exit or "((void)0) | |||
1534 | "the incoming value is defined in the inner header (it will "((void)0) | |||
1535 | "dominate all loop blocks after interchanging)")((void)0); | |||
1536 | P.replaceAllUsesWith(IncI); | |||
1537 | P.eraseFromParent(); | |||
1538 | } | |||
1539 | ||||
1540 | SmallVector<PHINode *, 8> LcssaInnerExit; | |||
1541 | for (PHINode &P : InnerExit->phis()) | |||
1542 | LcssaInnerExit.push_back(&P); | |||
1543 | ||||
1544 | SmallVector<PHINode *, 8> LcssaInnerLatch; | |||
1545 | for (PHINode &P : InnerLatch->phis()) | |||
1546 | LcssaInnerLatch.push_back(&P); | |||
1547 | ||||
1548 | // Lcssa PHIs for values used outside the inner loop are in InnerExit. | |||
1549 | // If a PHI node has users outside of InnerExit, it has a use outside the | |||
1550 | // interchanged loop and we have to preserve it. We move these to | |||
1551 | // InnerLatch, which will become the new exit block for the innermost | |||
1552 | // loop after interchanging. | |||
1553 | for (PHINode *P : LcssaInnerExit) | |||
| ||||
1554 | P->moveBefore(InnerLatch->getFirstNonPHI()); | |||
1555 | ||||
1556 | // If the inner loop latch contains LCSSA PHIs, those come from a child loop | |||
1557 | // and we have to move them to the new inner latch. | |||
1558 | for (PHINode *P : LcssaInnerLatch) | |||
1559 | P->moveBefore(InnerExit->getFirstNonPHI()); | |||
1560 | ||||
1561 | // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have | |||
1562 | // incoming values defined in the outer loop, we have to add a new PHI | |||
1563 | // in the inner loop latch, which became the exit block of the outer loop, | |||
1564 | // after interchanging. | |||
1565 | if (OuterExit) { | |||
1566 | for (PHINode &P : OuterExit->phis()) { | |||
1567 | if (P.getNumIncomingValues() != 1) | |||
1568 | continue; | |||
1569 | // Skip Phis with incoming values defined in the inner loop. Those should | |||
1570 | // already have been updated. | |||
1571 | auto I = dyn_cast<Instruction>(P.getIncomingValue(0)); | |||
1572 | if (!I || LI->getLoopFor(I->getParent()) == InnerLoop) | |||
1573 | continue; | |||
1574 | ||||
1575 | PHINode *NewPhi = dyn_cast<PHINode>(P.clone()); | |||
1576 | NewPhi->setIncomingValue(0, P.getIncomingValue(0)); | |||
| ||||
1577 | NewPhi->setIncomingBlock(0, OuterLatch); | |||
1578 | // We might have incoming edges from other BBs, i.e., the original outer | |||
1579 | // header. | |||
1580 | for (auto *Pred : predecessors(InnerLatch)) { | |||
1581 | if (Pred == OuterLatch) | |||
1582 | continue; | |||
1583 | NewPhi->addIncoming(P.getIncomingValue(0), Pred); | |||
1584 | } | |||
1585 | NewPhi->insertBefore(InnerLatch->getFirstNonPHI()); | |||
1586 | P.setIncomingValue(0, NewPhi); | |||
1587 | } | |||
1588 | } | |||
1589 | ||||
1590 | // Now adjust the incoming blocks for the LCSSA PHIs. | |||
1591 | // For PHIs moved from Inner's exit block, we need to replace Inner's latch | |||
1592 | // with the new latch. | |||
1593 | InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch); | |||
1594 | } | |||
1595 | ||||
1596 | bool LoopInterchangeTransform::adjustLoopBranches() { | |||
1597 | LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n")do { } while (false); | |||
1598 | std::vector<DominatorTree::UpdateType> DTUpdates; | |||
1599 | ||||
1600 | BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); | |||
1601 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); | |||
1602 | ||||
1603 | assert(OuterLoopPreHeader != OuterLoop->getHeader() &&((void)0) | |||
1604 | InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&((void)0) | |||
1605 | InnerLoopPreHeader && "Guaranteed by loop-simplify form")((void)0); | |||
1606 | // Ensure that both preheaders do not contain PHI nodes and have single | |||
1607 | // predecessors. This allows us to move them easily. We use | |||
1608 | // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing | |||
1609 | // preheaders do not satisfy those conditions. | |||
1610 | if (isa<PHINode>(OuterLoopPreHeader->begin()) || | |||
1611 | !OuterLoopPreHeader->getUniquePredecessor()) | |||
1612 | OuterLoopPreHeader = | |||
1613 | InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true); | |||
1614 | if (InnerLoopPreHeader == OuterLoop->getHeader()) | |||
1615 | InnerLoopPreHeader = | |||
1616 | InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true); | |||
1617 | ||||
1618 | // Adjust the loop preheader | |||
1619 | BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); | |||
1620 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); | |||
1621 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); | |||
1622 | BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); | |||
1623 | BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); | |||
1624 | BasicBlock *InnerLoopLatchPredecessor = | |||
1625 | InnerLoopLatch->getUniquePredecessor(); | |||
1626 | BasicBlock *InnerLoopLatchSuccessor; | |||
1627 | BasicBlock *OuterLoopLatchSuccessor; | |||
1628 | ||||
1629 | BranchInst *OuterLoopLatchBI = | |||
1630 | dyn_cast<BranchInst>(OuterLoopLatch->getTerminator()); | |||
1631 | BranchInst *InnerLoopLatchBI = | |||
1632 | dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); | |||
1633 | BranchInst *OuterLoopHeaderBI = | |||
1634 | dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); | |||
1635 | BranchInst *InnerLoopHeaderBI = | |||
1636 | dyn_cast<BranchInst>(InnerLoopHeader->getTerminator()); | |||
1637 | ||||
1638 | if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || | |||
1639 | !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || | |||
1640 | !InnerLoopHeaderBI) | |||
1641 | return false; | |||
1642 | ||||
1643 | BranchInst *InnerLoopLatchPredecessorBI = | |||
1644 | dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator()); | |||
1645 | BranchInst *OuterLoopPredecessorBI = | |||
1646 | dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator()); | |||
1647 | ||||
1648 | if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) | |||
1649 | return false; | |||
1650 | BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor(); | |||
1651 | if (!InnerLoopHeaderSuccessor) | |||
1652 | return false; | |||
1653 | ||||
1654 | // Adjust Loop Preheader and headers. | |||
1655 | // The branches in the outer loop predecessor and the outer loop header can | |||
1656 | // be unconditional branches or conditional branches with duplicates. Consider | |||
1657 | // this when updating the successors. | |||
1658 | updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader, | |||
1659 | InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false); | |||
1660 | // The outer loop header might or might not branch to the outer latch. | |||
1661 | // We are guaranteed to branch to the inner loop preheader. | |||
1662 | if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) { | |||
1663 | // In this case the outerLoopHeader should branch to the InnerLoopLatch. | |||
1664 | updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch, | |||
1665 | DTUpdates, | |||
1666 | /*MustUpdateOnce=*/false); | |||
1667 | } | |||
1668 | updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader, | |||
1669 | InnerLoopHeaderSuccessor, DTUpdates, | |||
1670 | /*MustUpdateOnce=*/false); | |||
1671 | ||||
1672 | // Adjust reduction PHI's now that the incoming block has changed. | |||
1673 | InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader, | |||
1674 | OuterLoopHeader); | |||
1675 | ||||
1676 | updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor, | |||
1677 | OuterLoopPreHeader, DTUpdates); | |||
1678 | ||||
1679 | // -------------Adjust loop latches----------- | |||
1680 | if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) | |||
1681 | InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1); | |||
1682 | else | |||
1683 | InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); | |||
1684 | ||||
1685 | updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch, | |||
1686 | InnerLoopLatchSuccessor, DTUpdates); | |||
1687 | ||||
1688 | ||||
1689 | if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) | |||
1690 | OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); | |||
1691 | else | |||
1692 | OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); | |||
1693 | ||||
1694 | updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor, | |||
1695 | OuterLoopLatchSuccessor, DTUpdates); | |||
1696 | updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch, | |||
1697 | DTUpdates); | |||
1698 | ||||
1699 | DT->applyUpdates(DTUpdates); | |||
1700 | restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader, | |||
1701 | OuterLoopPreHeader); | |||
1702 | ||||
1703 | moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch, | |||
1704 | OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(), | |||
1705 | InnerLoop, LI); | |||
1706 | // For PHIs in the exit block of the outer loop, outer's latch has been | |||
1707 | // replaced by Inners'. | |||
1708 | OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch); | |||
1709 | ||||
1710 | auto &OuterInnerReductions = LIL.getOuterInnerReductions(); | |||
1711 | // Now update the reduction PHIs in the inner and outer loop headers. | |||
1712 | SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs; | |||
1713 | for (PHINode &PHI : InnerLoopHeader->phis()) { | |||
1714 | if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) | |||
1715 | continue; | |||
1716 | InnerLoopPHIs.push_back(cast<PHINode>(&PHI)); | |||
1717 | } | |||
1718 | for (PHINode &PHI : OuterLoopHeader->phis()) { | |||
1719 | if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) | |||
1720 | continue; | |||
1721 | OuterLoopPHIs.push_back(cast<PHINode>(&PHI)); | |||
1722 | } | |||
1723 | ||||
1724 | // Now move the remaining reduction PHIs from outer to inner loop header and | |||
1725 | // vice versa. The PHI nodes must be part of a reduction across the inner and | |||
1726 | // outer loop and all the remains to do is and updating the incoming blocks. | |||
1727 | for (PHINode *PHI : OuterLoopPHIs) { | |||
1728 | PHI->moveBefore(InnerLoopHeader->getFirstNonPHI()); | |||
1729 | assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")((void)0); | |||
1730 | } | |||
1731 | for (PHINode *PHI : InnerLoopPHIs) { | |||
1732 | PHI->moveBefore(OuterLoopHeader->getFirstNonPHI()); | |||
1733 | assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node")((void)0); | |||
1734 | } | |||
1735 | ||||
1736 | // Update the incoming blocks for moved PHI nodes. | |||
1737 | OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader); | |||
1738 | OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch); | |||
1739 | InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader); | |||
1740 | InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch); | |||
1741 | ||||
1742 | // Values defined in the outer loop header could be used in the inner loop | |||
1743 | // latch. In that case, we need to create LCSSA phis for them, because after | |||
1744 | // interchanging they will be defined in the new inner loop and used in the | |||
1745 | // new outer loop. | |||
1746 | IRBuilder<> Builder(OuterLoopHeader->getContext()); | |||
1747 | SmallVector<Instruction *, 4> MayNeedLCSSAPhis; | |||
1748 | for (Instruction &I : | |||
1749 | make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end()))) | |||
1750 | MayNeedLCSSAPhis.push_back(&I); | |||
1751 | formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder); | |||
1752 | ||||
1753 | return true; | |||
1754 | } | |||
1755 | ||||
1756 | bool LoopInterchangeTransform::adjustLoopLinks() { | |||
1757 | // Adjust all branches in the inner and outer loop. | |||
1758 | bool Changed = adjustLoopBranches(); | |||
1759 | if (Changed) { | |||
1760 | // We have interchanged the preheaders so we need to interchange the data in | |||
1761 | // the preheaders as well. This is because the content of the inner | |||
1762 | // preheader was previously executed inside the outer loop. | |||
1763 | BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); | |||
1764 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); | |||
1765 | swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader); | |||
1766 | } | |||
1767 | return Changed; | |||
1768 | } | |||
1769 | ||||
1770 | /// Main LoopInterchange Pass. | |||
1771 | struct LoopInterchangeLegacyPass : public LoopPass { | |||
1772 | static char ID; | |||
1773 | ||||
1774 | LoopInterchangeLegacyPass() : LoopPass(ID) { | |||
1775 | initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry()); | |||
1776 | } | |||
1777 | ||||
1778 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
1779 | AU.addRequired<DependenceAnalysisWrapperPass>(); | |||
1780 | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); | |||
1781 | ||||
1782 | getLoopAnalysisUsage(AU); | |||
1783 | } | |||
1784 | ||||
1785 | bool runOnLoop(Loop *L, LPPassManager &LPM) override { | |||
1786 | if (skipLoop(L)) | |||
1787 | return false; | |||
1788 | ||||
1789 | auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); | |||
1790 | auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | |||
1791 | auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI(); | |||
1792 | auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | |||
1793 | auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); | |||
1794 | ||||
1795 | return LoopInterchange(SE, LI, DI, DT, ORE).run(L); | |||
1796 | } | |||
1797 | }; | |||
1798 | ||||
1799 | char LoopInterchangeLegacyPass::ID = 0; | |||
1800 | ||||
1801 | INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",static void *initializeLoopInterchangeLegacyPassPassOnce(PassRegistry &Registry) { | |||
1802 | "Interchanges loops for cache reuse", false, false)static void *initializeLoopInterchangeLegacyPassPassOnce(PassRegistry &Registry) { | |||
1803 | INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry); | |||
1804 | INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)initializeDependenceAnalysisWrapperPassPass(Registry); | |||
1805 | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)initializeOptimizationRemarkEmitterWrapperPassPass(Registry); | |||
1806 | ||||
1807 | INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse" , "loop-interchange", &LoopInterchangeLegacyPass::ID, PassInfo ::NormalCtor_t(callDefaultCtor<LoopInterchangeLegacyPass> ), false, false); Registry.registerPass(*PI, true); return PI ; } static llvm::once_flag InitializeLoopInterchangeLegacyPassPassFlag ; void llvm::initializeLoopInterchangeLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeLoopInterchangeLegacyPassPassFlag , initializeLoopInterchangeLegacyPassPassOnce, std::ref(Registry )); } | |||
1808 | "Interchanges loops for cache reuse", false, false)PassInfo *PI = new PassInfo( "Interchanges loops for cache reuse" , "loop-interchange", &LoopInterchangeLegacyPass::ID, PassInfo ::NormalCtor_t(callDefaultCtor<LoopInterchangeLegacyPass> ), false, false); Registry.registerPass(*PI, true); return PI ; } static llvm::once_flag InitializeLoopInterchangeLegacyPassPassFlag ; void llvm::initializeLoopInterchangeLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeLoopInterchangeLegacyPassPassFlag , initializeLoopInterchangeLegacyPassPassOnce, std::ref(Registry )); } | |||
1809 | ||||
1810 | Pass *llvm::createLoopInterchangePass() { | |||
1811 | return new LoopInterchangeLegacyPass(); | |||
1812 | } | |||
1813 | ||||
1814 | PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, | |||
1815 | LoopAnalysisManager &AM, | |||
1816 | LoopStandardAnalysisResults &AR, | |||
1817 | LPMUpdater &U) { | |||
1818 | Function &F = *LN.getParent(); | |||
1819 | ||||
1820 | DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); | |||
1821 | OptimizationRemarkEmitter ORE(&F); | |||
1822 | if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN)) | |||
1823 | return PreservedAnalyses::all(); | |||
1824 | return getLoopPassPreservedAnalyses(); | |||
1825 | } |