clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name LoopCacheAnalysis.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Analysis/LoopCacheAnalysis.cpp
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | |
25 | |
26 | |
27 | |
28 | #include "llvm/Analysis/LoopCacheAnalysis.h" |
29 | #include "llvm/ADT/BreadthFirstIterator.h" |
30 | #include "llvm/ADT/Sequence.h" |
31 | #include "llvm/ADT/SmallVector.h" |
32 | #include "llvm/Analysis/AliasAnalysis.h" |
33 | #include "llvm/Analysis/DependenceAnalysis.h" |
34 | #include "llvm/Analysis/LoopInfo.h" |
35 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
36 | #include "llvm/Analysis/TargetTransformInfo.h" |
37 | #include "llvm/Support/CommandLine.h" |
38 | #include "llvm/Support/Debug.h" |
39 | |
40 | using namespace llvm; |
41 | |
42 | #define DEBUG_TYPE "loop-cache-cost" |
43 | |
44 | static cl::opt<unsigned> DefaultTripCount( |
45 | "default-trip-count", cl::init(100), cl::Hidden, |
46 | cl::desc("Use this to specify the default trip count of a loop")); |
47 | |
48 | |
49 | |
50 | |
51 | static cl::opt<unsigned> TemporalReuseThreshold( |
52 | "temporal-reuse-threshold", cl::init(2), cl::Hidden, |
53 | cl::desc("Use this to specify the max. distance between array elements " |
54 | "accessed in a loop so that the elements are classified to have " |
55 | "temporal reuse")); |
56 | |
57 | |
58 | |
59 | |
60 | |
61 | static Loop *getInnerMostLoop(const LoopVectorTy &Loops) { |
62 | assert(!Loops.empty() && "Expecting a non-empy loop vector"); |
63 | |
64 | Loop *LastLoop = Loops.back(); |
65 | Loop *ParentLoop = LastLoop->getParentLoop(); |
66 | |
67 | if (ParentLoop == nullptr) { |
68 | assert(Loops.size() == 1 && "Expecting a single loop"); |
69 | return LastLoop; |
70 | } |
71 | |
72 | return (llvm::is_sorted(Loops, |
73 | [](const Loop *L1, const Loop *L2) { |
74 | return L1->getLoopDepth() < L2->getLoopDepth(); |
75 | })) |
76 | ? LastLoop |
77 | : nullptr; |
78 | } |
79 | |
80 | static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, |
81 | const Loop &L, ScalarEvolution &SE) { |
82 | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn); |
83 | if (!AR || !AR->isAffine()) |
84 | return false; |
85 | |
86 | assert(AR->getLoop() && "AR should have a loop"); |
87 | |
88 | |
89 | const SCEV *Start = AR->getStart(); |
90 | const SCEV *Step = AR->getStepRecurrence(SE); |
91 | if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step)) |
92 | return false; |
93 | |
94 | |
95 | if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L)) |
96 | return false; |
97 | |
98 | const SCEV *StepRec = AR->getStepRecurrence(SE); |
99 | if (StepRec && SE.isKnownNegative(StepRec)) |
100 | StepRec = SE.getNegativeSCEV(StepRec); |
101 | |
102 | return StepRec == &ElemSize; |
103 | } |
104 | |
105 | |
106 | |
107 | static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) { |
108 | const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L); |
109 | if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || |
110 | !isa<SCEVConstant>(BackedgeTakenCount)) |
111 | return nullptr; |
112 | return SE.getTripCountFromExitCount(BackedgeTakenCount); |
113 | } |
114 | |
115 | |
116 | |
117 | |
118 | raw_ostream &llvm::operator<<(raw_ostream &OS, const IndexedReference &R) { |
119 | if (!R.IsValid) { |
120 | OS << R.StoreOrLoadInst; |
121 | OS << ", IsValid=false."; |
122 | return OS; |
123 | } |
124 | |
125 | OS << *R.BasePointer; |
126 | for (const SCEV *Subscript : R.Subscripts) |
127 | OS << "[" << *Subscript << "]"; |
128 | |
129 | OS << ", Sizes: "; |
130 | for (const SCEV *Size : R.Sizes) |
131 | OS << "[" << *Size << "]"; |
132 | |
133 | return OS; |
134 | } |
135 | |
136 | IndexedReference::IndexedReference(Instruction &StoreOrLoadInst, |
137 | const LoopInfo &LI, ScalarEvolution &SE) |
138 | : StoreOrLoadInst(StoreOrLoadInst), SE(SE) { |
139 | assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) && |
140 | "Expecting a load or store instruction"); |
141 | |
142 | IsValid = delinearize(LI); |
143 | if (IsValid) |
144 | LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this |
145 | << "\n"); |
146 | } |
147 | |
148 | Optional<bool> IndexedReference::hasSpacialReuse(const IndexedReference &Other, |
149 | unsigned CLS, |
150 | AAResults &AA) const { |
151 | assert(IsValid && "Expecting a valid reference"); |
152 | |
153 | if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) { |
154 | LLVM_DEBUG(dbgs().indent(2) |
155 | << "No spacial reuse: different base pointers\n"); |
156 | return false; |
157 | } |
158 | |
159 | unsigned NumSubscripts = getNumSubscripts(); |
160 | if (NumSubscripts != Other.getNumSubscripts()) { |
161 | LLVM_DEBUG(dbgs().indent(2) |
162 | << "No spacial reuse: different number of subscripts\n"); |
163 | return false; |
164 | } |
165 | |
166 | |
167 | for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) { |
168 | if (getSubscript(SubNum) != Other.getSubscript(SubNum)) { |
169 | LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: " |
170 | << "\n\t" << *getSubscript(SubNum) << "\n\t" |
171 | << *Other.getSubscript(SubNum) << "\n"); |
172 | return false; |
173 | } |
174 | } |
175 | |
176 | |
177 | |
178 | const SCEV *LastSubscript = getLastSubscript(); |
179 | const SCEV *OtherLastSubscript = Other.getLastSubscript(); |
180 | const SCEVConstant *Diff = dyn_cast<SCEVConstant>( |
181 | SE.getMinusSCEV(LastSubscript, OtherLastSubscript)); |
182 | |
183 | if (Diff == nullptr) { |
184 | LLVM_DEBUG(dbgs().indent(2) |
185 | << "No spacial reuse, difference between subscript:\n\t" |
186 | << *LastSubscript << "\n\t" << OtherLastSubscript |
187 | << "\nis not constant.\n"); |
188 | return None; |
189 | } |
190 | |
191 | bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS); |
192 | |
193 | LLVM_DEBUG({ |
194 | if (InSameCacheLine) |
195 | dbgs().indent(2) << "Found spacial reuse.\n"; |
196 | else |
197 | dbgs().indent(2) << "No spacial reuse.\n"; |
198 | }); |
199 | |
200 | return InSameCacheLine; |
201 | } |
202 | |
203 | Optional<bool> IndexedReference::hasTemporalReuse(const IndexedReference &Other, |
204 | unsigned MaxDistance, |
205 | const Loop &L, |
206 | DependenceInfo &DI, |
207 | AAResults &AA) const { |
208 | assert(IsValid && "Expecting a valid reference"); |
209 | |
210 | if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) { |
211 | LLVM_DEBUG(dbgs().indent(2) |
212 | << "No temporal reuse: different base pointer\n"); |
213 | return false; |
214 | } |
215 | |
216 | std::unique_ptr<Dependence> D = |
217 | DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true); |
218 | |
219 | if (D == nullptr) { |
220 | LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n"); |
221 | return false; |
222 | } |
223 | |
224 | if (D->isLoopIndependent()) { |
225 | LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n"); |
226 | return true; |
227 | } |
228 | |
229 | |
230 | |
231 | |
232 | int LoopDepth = L.getLoopDepth(); |
233 | int Levels = D->getLevels(); |
234 | for (int Level = 1; Level <= Levels; ++Level) { |
235 | const SCEV *Distance = D->getDistance(Level); |
236 | const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance); |
237 | |
238 | if (SCEVConst == nullptr) { |
239 | LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n"); |
240 | return None; |
241 | } |
242 | |
243 | const ConstantInt &CI = *SCEVConst->getValue(); |
244 | if (Level != LoopDepth && !CI.isZero()) { |
245 | LLVM_DEBUG(dbgs().indent(2) |
246 | << "No temporal reuse: distance is not zero at depth=" << Level |
247 | << "\n"); |
248 | return false; |
249 | } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) { |
250 | LLVM_DEBUG( |
251 | dbgs().indent(2) |
252 | << "No temporal reuse: distance is greater than MaxDistance at depth=" |
253 | << Level << "\n"); |
254 | return false; |
255 | } |
256 | } |
257 | |
258 | LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n"); |
259 | return true; |
260 | } |
261 | |
262 | CacheCostTy IndexedReference::computeRefCost(const Loop &L, |
263 | unsigned CLS) const { |
264 | assert(IsValid && "Expecting a valid reference"); |
265 | LLVM_DEBUG({ |
| 1 | Loop condition is false. Exiting loop | |
|
266 | dbgs().indent(2) << "Computing cache cost for:\n"; |
267 | dbgs().indent(4) << *this << "\n"; |
268 | }); |
269 | |
270 | |
271 | if (isLoopInvariant(L)) { |
| 2 | | Assuming the condition is false | |
|
| |
272 | LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n"); |
273 | return 1; |
274 | } |
275 | |
276 | const SCEV *TripCount = computeTripCount(L, SE); |
277 | if (!TripCount) { |
| 4 | | Assuming 'TripCount' is non-null | |
|
| |
278 | LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName() |
279 | << " could not be computed, using DefaultTripCount\n"); |
280 | const SCEV *ElemSize = Sizes.back(); |
281 | TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount); |
282 | } |
283 | LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n"); |
| 6 | | Loop condition is false. Exiting loop | |
|
284 | |
285 | |
286 | |
287 | const SCEV *RefCost = TripCount; |
288 | |
289 | if (isConsecutive(L, CLS)) { |
| 7 | | Calling 'IndexedReference::isConsecutive' | |
|
290 | const SCEV *Coeff = getLastCoefficient(); |
291 | const SCEV *ElemSize = Sizes.back(); |
292 | const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); |
293 | const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); |
294 | Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType()); |
295 | if (SE.isKnownNegative(Stride)) |
296 | Stride = SE.getNegativeSCEV(Stride); |
297 | Stride = SE.getNoopOrAnyExtend(Stride, WiderType); |
298 | TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType); |
299 | const SCEV *Numerator = SE.getMulExpr(Stride, TripCount); |
300 | RefCost = SE.getUDivExpr(Numerator, CacheLineSize); |
301 | |
302 | LLVM_DEBUG(dbgs().indent(4) |
303 | << "Access is consecutive: RefCost=(TripCount*Stride)/CLS=" |
304 | << *RefCost << "\n"); |
305 | } else |
306 | LLVM_DEBUG(dbgs().indent(4) |
307 | << "Access is not consecutive: RefCost=TripCount=" << *RefCost |
308 | << "\n"); |
309 | |
310 | |
311 | if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost)) |
312 | return ConstantCost->getValue()->getSExtValue(); |
313 | |
314 | LLVM_DEBUG(dbgs().indent(4) |
315 | << "RefCost is not a constant! Setting to RefCost=InvalidCost " |
316 | "(invalid value).\n"); |
317 | |
318 | return CacheCost::InvalidCost; |
319 | } |
320 | |
321 | bool IndexedReference::delinearize(const LoopInfo &LI) { |
322 | assert(Subscripts.empty() && "Subscripts should be empty"); |
323 | assert(Sizes.empty() && "Sizes should be empty"); |
324 | assert(!IsValid && "Should be called once from the constructor"); |
325 | LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n"); |
326 | |
327 | const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst); |
328 | const BasicBlock *BB = StoreOrLoadInst.getParent(); |
329 | |
330 | if (Loop *L = LI.getLoopFor(BB)) { |
331 | const SCEV *AccessFn = |
332 | SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L); |
333 | |
334 | BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn)); |
335 | if (BasePointer == nullptr) { |
336 | LLVM_DEBUG( |
337 | dbgs().indent(2) |
338 | << "ERROR: failed to delinearize, can't identify base pointer\n"); |
339 | return false; |
340 | } |
341 | |
342 | AccessFn = SE.getMinusSCEV(AccessFn, BasePointer); |
343 | |
344 | LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName() |
345 | << "', AccessFn: " << *AccessFn << "\n"); |
346 | |
347 | SE.delinearize(AccessFn, Subscripts, Sizes, |
348 | SE.getElementSize(&StoreOrLoadInst)); |
349 | |
350 | if (Subscripts.empty() || Sizes.empty() || |
351 | Subscripts.size() != Sizes.size()) { |
352 | |
353 | |
354 | if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) { |
355 | LLVM_DEBUG(dbgs().indent(2) |
356 | << "ERROR: failed to delinearize reference\n"); |
357 | Subscripts.clear(); |
358 | Sizes.clear(); |
359 | return false; |
360 | } |
361 | |
362 | |
363 | |
364 | |
365 | |
366 | |
367 | const SCEVAddRecExpr *AccessFnAR = dyn_cast<SCEVAddRecExpr>(AccessFn); |
368 | const SCEV *StepRec = AccessFnAR ? AccessFnAR->getStepRecurrence(SE) : nullptr; |
369 | |
370 | if (StepRec && SE.isKnownNegative(StepRec)) |
371 | AccessFn = SE.getAddRecExpr(AccessFnAR->getStart(), |
372 | SE.getNegativeSCEV(StepRec), |
373 | AccessFnAR->getLoop(), |
374 | AccessFnAR->getNoWrapFlags()); |
375 | const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize); |
376 | Subscripts.push_back(Div); |
377 | Sizes.push_back(ElemSize); |
378 | } |
379 | |
380 | return all_of(Subscripts, [&](const SCEV *Subscript) { |
381 | return isSimpleAddRecurrence(*Subscript, *L); |
382 | }); |
383 | } |
384 | |
385 | return false; |
386 | } |
387 | |
388 | bool IndexedReference::isLoopInvariant(const Loop &L) const { |
389 | Value *Addr = getPointerOperand(&StoreOrLoadInst); |
390 | assert(Addr != nullptr && "Expecting either a load or a store instruction"); |
391 | assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable"); |
392 | |
393 | if (SE.isLoopInvariant(SE.getSCEV(Addr), &L)) |
394 | return true; |
395 | |
396 | |
397 | |
398 | bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) { |
399 | return isCoeffForLoopZeroOrInvariant(*Subscript, L); |
400 | }); |
401 | |
402 | return allCoeffForLoopAreZero; |
403 | } |
404 | |
405 | bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const { |
406 | |
407 | |
408 | const SCEV *LastSubscript = Subscripts.back(); |
409 | for (const SCEV *Subscript : Subscripts) { |
| 8 | | Assuming '__begin1' is equal to '__end1' | |
|
410 | if (Subscript == LastSubscript) |
411 | continue; |
412 | if (!isCoeffForLoopZeroOrInvariant(*Subscript, L)) |
413 | return false; |
414 | } |
415 | |
416 | |
417 | const SCEV *Coeff = getLastCoefficient(); |
| 9 | | Calling 'IndexedReference::getLastCoefficient' | |
|
418 | const SCEV *ElemSize = Sizes.back(); |
419 | const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); |
420 | const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); |
421 | |
422 | Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride; |
423 | return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize); |
424 | } |
425 | |
426 | const SCEV *IndexedReference::getLastCoefficient() const { |
427 | const SCEV *LastSubscript = getLastSubscript(); |
428 | assert(isa<SCEVAddRecExpr>(LastSubscript) && |
429 | "Expecting a SCEV add recurrence expression"); |
430 | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LastSubscript); |
| 10 | | Assuming 'LastSubscript' is not a 'SCEVAddRecExpr' | |
|
| 11 | | 'AR' initialized to a null pointer value | |
|
431 | return AR->getStepRecurrence(SE); |
| 12 | | Called C++ object pointer is null |
|
432 | } |
433 | |
434 | bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript, |
435 | const Loop &L) const { |
436 | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript); |
437 | return (AR != nullptr) ? AR->getLoop() != &L |
438 | : SE.isLoopInvariant(&Subscript, &L); |
439 | } |
440 | |
441 | bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript, |
442 | const Loop &L) const { |
443 | if (!isa<SCEVAddRecExpr>(Subscript)) |
444 | return false; |
445 | |
446 | const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript); |
447 | assert(AR->getLoop() && "AR should have a loop"); |
448 | |
449 | if (!AR->isAffine()) |
450 | return false; |
451 | |
452 | const SCEV *Start = AR->getStart(); |
453 | const SCEV *Step = AR->getStepRecurrence(SE); |
454 | |
455 | if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L)) |
456 | return false; |
457 | |
458 | return true; |
459 | } |
460 | |
461 | bool IndexedReference::isAliased(const IndexedReference &Other, |
462 | AAResults &AA) const { |
463 | const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst); |
464 | const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst); |
465 | return AA.isMustAlias(Loc1, Loc2); |
466 | } |
467 | |
468 | |
469 | |
470 | |
471 | raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) { |
472 | for (const auto &LC : CC.LoopCosts) { |
473 | const Loop *L = LC.first; |
474 | OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n"; |
475 | } |
476 | return OS; |
477 | } |
478 | |
479 | CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, |
480 | ScalarEvolution &SE, TargetTransformInfo &TTI, |
481 | AAResults &AA, DependenceInfo &DI, |
482 | Optional<unsigned> TRT) |
483 | : Loops(Loops), TripCounts(), LoopCosts(), |
484 | TRT((TRT == None) ? Optional<unsigned>(TemporalReuseThreshold) : TRT), |
485 | LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) { |
486 | assert(!Loops.empty() && "Expecting a non-empty loop vector."); |
487 | |
488 | for (const Loop *L : Loops) { |
489 | unsigned TripCount = SE.getSmallConstantTripCount(L); |
490 | TripCount = (TripCount == 0) ? DefaultTripCount : TripCount; |
491 | TripCounts.push_back({L, TripCount}); |
492 | } |
493 | |
494 | calculateCacheFootprint(); |
495 | } |
496 | |
497 | std::unique_ptr<CacheCost> |
498 | CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, |
499 | DependenceInfo &DI, Optional<unsigned> TRT) { |
500 | if (!Root.isOutermost()) { |
501 | LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n"); |
502 | return nullptr; |
503 | } |
504 | |
505 | LoopVectorTy Loops; |
506 | append_range(Loops, breadth_first(&Root)); |
507 | |
508 | if (!getInnerMostLoop(Loops)) { |
509 | LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more " |
510 | "than one innermost loop\n"); |
511 | return nullptr; |
512 | } |
513 | |
514 | return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT); |
515 | } |
516 | |
517 | void CacheCost::calculateCacheFootprint() { |
518 | LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n"); |
519 | ReferenceGroupsTy RefGroups; |
520 | if (!populateReferenceGroups(RefGroups)) |
521 | return; |
522 | |
523 | LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n"); |
524 | for (const Loop *L : Loops) { |
525 | assert((std::find_if(LoopCosts.begin(), LoopCosts.end(), |
526 | [L](const LoopCacheCostTy &LCC) { |
527 | return LCC.first == L; |
528 | }) == LoopCosts.end()) && |
529 | "Should not add duplicate element"); |
530 | CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups); |
531 | LoopCosts.push_back(std::make_pair(L, LoopCost)); |
532 | } |
533 | |
534 | sortLoopCosts(); |
535 | RefGroups.clear(); |
536 | } |
537 | |
538 | bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const { |
539 | assert(RefGroups.empty() && "Reference groups should be empty"); |
540 | |
541 | unsigned CLS = TTI.getCacheLineSize(); |
542 | Loop *InnerMostLoop = getInnerMostLoop(Loops); |
543 | assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop"); |
544 | |
545 | for (BasicBlock *BB : InnerMostLoop->getBlocks()) { |
546 | for (Instruction &I : *BB) { |
547 | if (!isa<StoreInst>(I) && !isa<LoadInst>(I)) |
548 | continue; |
549 | |
550 | std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE)); |
551 | if (!R->isValid()) |
552 | continue; |
553 | |
554 | bool Added = false; |
555 | for (ReferenceGroupTy &RefGroup : RefGroups) { |
556 | const IndexedReference &Representative = *RefGroup.front().get(); |
557 | LLVM_DEBUG({ |
558 | dbgs() << "References:\n"; |
559 | dbgs().indent(2) << *R << "\n"; |
560 | dbgs().indent(2) << Representative << "\n"; |
561 | }); |
562 | |
563 | |
564 | |
565 | |
566 | |
567 | |
568 | |
569 | |
570 | |
571 | |
572 | |
573 | |
574 | |
575 | Optional<bool> HasTemporalReuse = |
576 | R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA); |
577 | Optional<bool> HasSpacialReuse = |
578 | R->hasSpacialReuse(Representative, CLS, AA); |
579 | |
580 | if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) || |
581 | (HasSpacialReuse.hasValue() && *HasSpacialReuse)) { |
582 | RefGroup.push_back(std::move(R)); |
583 | Added = true; |
584 | break; |
585 | } |
586 | } |
587 | |
588 | if (!Added) { |
589 | ReferenceGroupTy RG; |
590 | RG.push_back(std::move(R)); |
591 | RefGroups.push_back(std::move(RG)); |
592 | } |
593 | } |
594 | } |
595 | |
596 | if (RefGroups.empty()) |
597 | return false; |
598 | |
599 | LLVM_DEBUG({ |
600 | dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n"; |
601 | int n = 1; |
602 | for (const ReferenceGroupTy &RG : RefGroups) { |
603 | dbgs().indent(2) << "RefGroup " << n << ":\n"; |
604 | for (const auto &IR : RG) |
605 | dbgs().indent(4) << *IR << "\n"; |
606 | n++; |
607 | } |
608 | dbgs() << "\n"; |
609 | }); |
610 | |
611 | return true; |
612 | } |
613 | |
614 | CacheCostTy |
615 | CacheCost::computeLoopCacheCost(const Loop &L, |
616 | const ReferenceGroupsTy &RefGroups) const { |
617 | if (!L.isLoopSimplifyForm()) |
618 | return InvalidCost; |
619 | |
620 | LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName() |
621 | << "' as innermost loop.\n"); |
622 | |
623 | |
624 | CacheCostTy TripCountsProduct = 1; |
625 | for (const auto &TC : TripCounts) { |
626 | if (TC.first == &L) |
627 | continue; |
628 | TripCountsProduct *= TC.second; |
629 | } |
630 | |
631 | CacheCostTy LoopCost = 0; |
632 | for (const ReferenceGroupTy &RG : RefGroups) { |
633 | CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L); |
634 | LoopCost += RefGroupCost * TripCountsProduct; |
635 | } |
636 | |
637 | LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName() |
638 | << "' has cost=" << LoopCost << "\n"); |
639 | |
640 | return LoopCost; |
641 | } |
642 | |
643 | CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG, |
644 | const Loop &L) const { |
645 | assert(!RG.empty() && "Reference group should have at least one member."); |
646 | |
647 | const IndexedReference *Representative = RG.front().get(); |
648 | return Representative->computeRefCost(L, TTI.getCacheLineSize()); |
649 | } |
650 | |
651 | |
652 | |
653 | |
654 | PreservedAnalyses LoopCachePrinterPass::run(Loop &L, LoopAnalysisManager &AM, |
655 | LoopStandardAnalysisResults &AR, |
656 | LPMUpdater &U) { |
657 | Function *F = L.getHeader()->getParent(); |
658 | DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI); |
659 | |
660 | if (auto CC = CacheCost::getCacheCost(L, AR, DI)) |
661 | OS << *CC; |
662 | |
663 | return PreservedAnalyses::all(); |
664 | } |