File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/IPO/GlobalOpt.cpp |
Warning: | line 1831, column 27 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This pass transforms simple global variables that never have their address | |||
10 | // taken. If obviously true, it marks read/write globals as constant, deletes | |||
11 | // variables only stored to, etc. | |||
12 | // | |||
13 | //===----------------------------------------------------------------------===// | |||
14 | ||||
15 | #include "llvm/Transforms/IPO/GlobalOpt.h" | |||
16 | #include "llvm/ADT/DenseMap.h" | |||
17 | #include "llvm/ADT/STLExtras.h" | |||
18 | #include "llvm/ADT/SmallPtrSet.h" | |||
19 | #include "llvm/ADT/SmallVector.h" | |||
20 | #include "llvm/ADT/Statistic.h" | |||
21 | #include "llvm/ADT/Twine.h" | |||
22 | #include "llvm/ADT/iterator_range.h" | |||
23 | #include "llvm/Analysis/BlockFrequencyInfo.h" | |||
24 | #include "llvm/Analysis/ConstantFolding.h" | |||
25 | #include "llvm/Analysis/MemoryBuiltins.h" | |||
26 | #include "llvm/Analysis/TargetLibraryInfo.h" | |||
27 | #include "llvm/Analysis/TargetTransformInfo.h" | |||
28 | #include "llvm/BinaryFormat/Dwarf.h" | |||
29 | #include "llvm/IR/Attributes.h" | |||
30 | #include "llvm/IR/BasicBlock.h" | |||
31 | #include "llvm/IR/CallingConv.h" | |||
32 | #include "llvm/IR/Constant.h" | |||
33 | #include "llvm/IR/Constants.h" | |||
34 | #include "llvm/IR/DataLayout.h" | |||
35 | #include "llvm/IR/DebugInfoMetadata.h" | |||
36 | #include "llvm/IR/DerivedTypes.h" | |||
37 | #include "llvm/IR/Dominators.h" | |||
38 | #include "llvm/IR/Function.h" | |||
39 | #include "llvm/IR/GetElementPtrTypeIterator.h" | |||
40 | #include "llvm/IR/GlobalAlias.h" | |||
41 | #include "llvm/IR/GlobalValue.h" | |||
42 | #include "llvm/IR/GlobalVariable.h" | |||
43 | #include "llvm/IR/IRBuilder.h" | |||
44 | #include "llvm/IR/InstrTypes.h" | |||
45 | #include "llvm/IR/Instruction.h" | |||
46 | #include "llvm/IR/Instructions.h" | |||
47 | #include "llvm/IR/IntrinsicInst.h" | |||
48 | #include "llvm/IR/Module.h" | |||
49 | #include "llvm/IR/Operator.h" | |||
50 | #include "llvm/IR/Type.h" | |||
51 | #include "llvm/IR/Use.h" | |||
52 | #include "llvm/IR/User.h" | |||
53 | #include "llvm/IR/Value.h" | |||
54 | #include "llvm/IR/ValueHandle.h" | |||
55 | #include "llvm/InitializePasses.h" | |||
56 | #include "llvm/Pass.h" | |||
57 | #include "llvm/Support/AtomicOrdering.h" | |||
58 | #include "llvm/Support/Casting.h" | |||
59 | #include "llvm/Support/CommandLine.h" | |||
60 | #include "llvm/Support/Debug.h" | |||
61 | #include "llvm/Support/ErrorHandling.h" | |||
62 | #include "llvm/Support/MathExtras.h" | |||
63 | #include "llvm/Support/raw_ostream.h" | |||
64 | #include "llvm/Transforms/IPO.h" | |||
65 | #include "llvm/Transforms/Utils/CtorUtils.h" | |||
66 | #include "llvm/Transforms/Utils/Evaluator.h" | |||
67 | #include "llvm/Transforms/Utils/GlobalStatus.h" | |||
68 | #include "llvm/Transforms/Utils/Local.h" | |||
69 | #include <cassert> | |||
70 | #include <cstdint> | |||
71 | #include <utility> | |||
72 | #include <vector> | |||
73 | ||||
74 | using namespace llvm; | |||
75 | ||||
76 | #define DEBUG_TYPE"globalopt" "globalopt" | |||
77 | ||||
78 | STATISTIC(NumMarked , "Number of globals marked constant")static llvm::Statistic NumMarked = {"globalopt", "NumMarked", "Number of globals marked constant"}; | |||
79 | STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr")static llvm::Statistic NumUnnamed = {"globalopt", "NumUnnamed" , "Number of globals marked unnamed_addr"}; | |||
80 | STATISTIC(NumSRA , "Number of aggregate globals broken into scalars")static llvm::Statistic NumSRA = {"globalopt", "NumSRA", "Number of aggregate globals broken into scalars" }; | |||
81 | STATISTIC(NumSubstitute,"Number of globals with initializers stored into them")static llvm::Statistic NumSubstitute = {"globalopt", "NumSubstitute" , "Number of globals with initializers stored into them"}; | |||
82 | STATISTIC(NumDeleted , "Number of globals deleted")static llvm::Statistic NumDeleted = {"globalopt", "NumDeleted" , "Number of globals deleted"}; | |||
83 | STATISTIC(NumGlobUses , "Number of global uses devirtualized")static llvm::Statistic NumGlobUses = {"globalopt", "NumGlobUses" , "Number of global uses devirtualized"}; | |||
84 | STATISTIC(NumLocalized , "Number of globals localized")static llvm::Statistic NumLocalized = {"globalopt", "NumLocalized" , "Number of globals localized"}; | |||
85 | STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans")static llvm::Statistic NumShrunkToBool = {"globalopt", "NumShrunkToBool" , "Number of global vars shrunk to booleans"}; | |||
86 | STATISTIC(NumFastCallFns , "Number of functions converted to fastcc")static llvm::Statistic NumFastCallFns = {"globalopt", "NumFastCallFns" , "Number of functions converted to fastcc"}; | |||
87 | STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated")static llvm::Statistic NumCtorsEvaluated = {"globalopt", "NumCtorsEvaluated" , "Number of static ctors evaluated"}; | |||
88 | STATISTIC(NumNestRemoved , "Number of nest attributes removed")static llvm::Statistic NumNestRemoved = {"globalopt", "NumNestRemoved" , "Number of nest attributes removed"}; | |||
89 | STATISTIC(NumAliasesResolved, "Number of global aliases resolved")static llvm::Statistic NumAliasesResolved = {"globalopt", "NumAliasesResolved" , "Number of global aliases resolved"}; | |||
90 | STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated")static llvm::Statistic NumAliasesRemoved = {"globalopt", "NumAliasesRemoved" , "Number of global aliases eliminated"}; | |||
91 | STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed")static llvm::Statistic NumCXXDtorsRemoved = {"globalopt", "NumCXXDtorsRemoved" , "Number of global C++ destructors removed"}; | |||
92 | STATISTIC(NumInternalFunc, "Number of internal functions")static llvm::Statistic NumInternalFunc = {"globalopt", "NumInternalFunc" , "Number of internal functions"}; | |||
93 | STATISTIC(NumColdCC, "Number of functions marked coldcc")static llvm::Statistic NumColdCC = {"globalopt", "NumColdCC", "Number of functions marked coldcc"}; | |||
94 | ||||
95 | static cl::opt<bool> | |||
96 | EnableColdCCStressTest("enable-coldcc-stress-test", | |||
97 | cl::desc("Enable stress test of coldcc by adding " | |||
98 | "calling conv to all internal functions."), | |||
99 | cl::init(false), cl::Hidden); | |||
100 | ||||
101 | static cl::opt<int> ColdCCRelFreq( | |||
102 | "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, | |||
103 | cl::desc( | |||
104 | "Maximum block frequency, expressed as a percentage of caller's " | |||
105 | "entry frequency, for a call site to be considered cold for enabling" | |||
106 | "coldcc")); | |||
107 | ||||
108 | /// Is this global variable possibly used by a leak checker as a root? If so, | |||
109 | /// we might not really want to eliminate the stores to it. | |||
110 | static bool isLeakCheckerRoot(GlobalVariable *GV) { | |||
111 | // A global variable is a root if it is a pointer, or could plausibly contain | |||
112 | // a pointer. There are two challenges; one is that we could have a struct | |||
113 | // the has an inner member which is a pointer. We recurse through the type to | |||
114 | // detect these (up to a point). The other is that we may actually be a union | |||
115 | // of a pointer and another type, and so our LLVM type is an integer which | |||
116 | // gets converted into a pointer, or our type is an [i8 x #] with a pointer | |||
117 | // potentially contained here. | |||
118 | ||||
119 | if (GV->hasPrivateLinkage()) | |||
120 | return false; | |||
121 | ||||
122 | SmallVector<Type *, 4> Types; | |||
123 | Types.push_back(GV->getValueType()); | |||
124 | ||||
125 | unsigned Limit = 20; | |||
126 | do { | |||
127 | Type *Ty = Types.pop_back_val(); | |||
128 | switch (Ty->getTypeID()) { | |||
129 | default: break; | |||
130 | case Type::PointerTyID: | |||
131 | return true; | |||
132 | case Type::FixedVectorTyID: | |||
133 | case Type::ScalableVectorTyID: | |||
134 | if (cast<VectorType>(Ty)->getElementType()->isPointerTy()) | |||
135 | return true; | |||
136 | break; | |||
137 | case Type::ArrayTyID: | |||
138 | Types.push_back(cast<ArrayType>(Ty)->getElementType()); | |||
139 | break; | |||
140 | case Type::StructTyID: { | |||
141 | StructType *STy = cast<StructType>(Ty); | |||
142 | if (STy->isOpaque()) return true; | |||
143 | for (StructType::element_iterator I = STy->element_begin(), | |||
144 | E = STy->element_end(); I != E; ++I) { | |||
145 | Type *InnerTy = *I; | |||
146 | if (isa<PointerType>(InnerTy)) return true; | |||
147 | if (isa<StructType>(InnerTy) || isa<ArrayType>(InnerTy) || | |||
148 | isa<VectorType>(InnerTy)) | |||
149 | Types.push_back(InnerTy); | |||
150 | } | |||
151 | break; | |||
152 | } | |||
153 | } | |||
154 | if (--Limit == 0) return true; | |||
155 | } while (!Types.empty()); | |||
156 | return false; | |||
157 | } | |||
158 | ||||
159 | /// Given a value that is stored to a global but never read, determine whether | |||
160 | /// it's safe to remove the store and the chain of computation that feeds the | |||
161 | /// store. | |||
162 | static bool IsSafeComputationToRemove( | |||
163 | Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { | |||
164 | do { | |||
165 | if (isa<Constant>(V)) | |||
166 | return true; | |||
167 | if (!V->hasOneUse()) | |||
168 | return false; | |||
169 | if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) || | |||
170 | isa<GlobalValue>(V)) | |||
171 | return false; | |||
172 | if (isAllocationFn(V, GetTLI)) | |||
173 | return true; | |||
174 | ||||
175 | Instruction *I = cast<Instruction>(V); | |||
176 | if (I->mayHaveSideEffects()) | |||
177 | return false; | |||
178 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { | |||
179 | if (!GEP->hasAllConstantIndices()) | |||
180 | return false; | |||
181 | } else if (I->getNumOperands() != 1) { | |||
182 | return false; | |||
183 | } | |||
184 | ||||
185 | V = I->getOperand(0); | |||
186 | } while (true); | |||
187 | } | |||
188 | ||||
189 | /// This GV is a pointer root. Loop over all users of the global and clean up | |||
190 | /// any that obviously don't assign the global a value that isn't dynamically | |||
191 | /// allocated. | |||
192 | static bool | |||
193 | CleanupPointerRootUsers(GlobalVariable *GV, | |||
194 | function_ref<TargetLibraryInfo &(Function &)> GetTLI) { | |||
195 | // A brief explanation of leak checkers. The goal is to find bugs where | |||
196 | // pointers are forgotten, causing an accumulating growth in memory | |||
197 | // usage over time. The common strategy for leak checkers is to explicitly | |||
198 | // allow the memory pointed to by globals at exit. This is popular because it | |||
199 | // also solves another problem where the main thread of a C++ program may shut | |||
200 | // down before other threads that are still expecting to use those globals. To | |||
201 | // handle that case, we expect the program may create a singleton and never | |||
202 | // destroy it. | |||
203 | ||||
204 | bool Changed = false; | |||
205 | ||||
206 | // If Dead[n].first is the only use of a malloc result, we can delete its | |||
207 | // chain of computation and the store to the global in Dead[n].second. | |||
208 | SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead; | |||
209 | ||||
210 | // Constants can't be pointers to dynamically allocated memory. | |||
211 | for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end(); | |||
212 | UI != E;) { | |||
213 | User *U = *UI++; | |||
214 | if (StoreInst *SI = dyn_cast<StoreInst>(U)) { | |||
215 | Value *V = SI->getValueOperand(); | |||
216 | if (isa<Constant>(V)) { | |||
217 | Changed = true; | |||
218 | SI->eraseFromParent(); | |||
219 | } else if (Instruction *I = dyn_cast<Instruction>(V)) { | |||
220 | if (I->hasOneUse()) | |||
221 | Dead.push_back(std::make_pair(I, SI)); | |||
222 | } | |||
223 | } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) { | |||
224 | if (isa<Constant>(MSI->getValue())) { | |||
225 | Changed = true; | |||
226 | MSI->eraseFromParent(); | |||
227 | } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) { | |||
228 | if (I->hasOneUse()) | |||
229 | Dead.push_back(std::make_pair(I, MSI)); | |||
230 | } | |||
231 | } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) { | |||
232 | GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource()); | |||
233 | if (MemSrc && MemSrc->isConstant()) { | |||
234 | Changed = true; | |||
235 | MTI->eraseFromParent(); | |||
236 | } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) { | |||
237 | if (I->hasOneUse()) | |||
238 | Dead.push_back(std::make_pair(I, MTI)); | |||
239 | } | |||
240 | } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { | |||
241 | if (CE->use_empty()) { | |||
242 | CE->destroyConstant(); | |||
243 | Changed = true; | |||
244 | } | |||
245 | } else if (Constant *C = dyn_cast<Constant>(U)) { | |||
246 | if (isSafeToDestroyConstant(C)) { | |||
247 | C->destroyConstant(); | |||
248 | // This could have invalidated UI, start over from scratch. | |||
249 | Dead.clear(); | |||
250 | CleanupPointerRootUsers(GV, GetTLI); | |||
251 | return true; | |||
252 | } | |||
253 | } | |||
254 | } | |||
255 | ||||
256 | for (int i = 0, e = Dead.size(); i != e; ++i) { | |||
257 | if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) { | |||
258 | Dead[i].second->eraseFromParent(); | |||
259 | Instruction *I = Dead[i].first; | |||
260 | do { | |||
261 | if (isAllocationFn(I, GetTLI)) | |||
262 | break; | |||
263 | Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); | |||
264 | if (!J) | |||
265 | break; | |||
266 | I->eraseFromParent(); | |||
267 | I = J; | |||
268 | } while (true); | |||
269 | I->eraseFromParent(); | |||
270 | Changed = true; | |||
271 | } | |||
272 | } | |||
273 | ||||
274 | return Changed; | |||
275 | } | |||
276 | ||||
277 | /// We just marked GV constant. Loop over all users of the global, cleaning up | |||
278 | /// the obvious ones. This is largely just a quick scan over the use list to | |||
279 | /// clean up the easy and obvious cruft. This returns true if it made a change. | |||
280 | static bool CleanupConstantGlobalUsers( | |||
281 | Value *V, Constant *Init, const DataLayout &DL, | |||
282 | function_ref<TargetLibraryInfo &(Function &)> GetTLI) { | |||
283 | bool Changed = false; | |||
284 | // Note that we need to use a weak value handle for the worklist items. When | |||
285 | // we delete a constant array, we may also be holding pointer to one of its | |||
286 | // elements (or an element of one of its elements if we're dealing with an | |||
287 | // array of arrays) in the worklist. | |||
288 | SmallVector<WeakTrackingVH, 8> WorkList(V->users()); | |||
289 | while (!WorkList.empty()) { | |||
290 | Value *UV = WorkList.pop_back_val(); | |||
291 | if (!UV) | |||
292 | continue; | |||
293 | ||||
294 | User *U = cast<User>(UV); | |||
295 | ||||
296 | if (LoadInst *LI = dyn_cast<LoadInst>(U)) { | |||
297 | if (Init) { | |||
298 | if (auto *Casted = | |||
299 | ConstantFoldLoadThroughBitcast(Init, LI->getType(), DL)) { | |||
300 | // Replace the load with the initializer. | |||
301 | LI->replaceAllUsesWith(Casted); | |||
302 | LI->eraseFromParent(); | |||
303 | Changed = true; | |||
304 | } | |||
305 | } | |||
306 | } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { | |||
307 | // Store must be unreachable or storing Init into the global. | |||
308 | SI->eraseFromParent(); | |||
309 | Changed = true; | |||
310 | } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { | |||
311 | if (CE->getOpcode() == Instruction::GetElementPtr) { | |||
312 | Constant *SubInit = nullptr; | |||
313 | if (Init) | |||
314 | SubInit = ConstantFoldLoadThroughGEPConstantExpr( | |||
315 | Init, CE, V->getType()->getPointerElementType(), DL); | |||
316 | Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI); | |||
317 | } else if ((CE->getOpcode() == Instruction::BitCast && | |||
318 | CE->getType()->isPointerTy()) || | |||
319 | CE->getOpcode() == Instruction::AddrSpaceCast) { | |||
320 | // Pointer cast, delete any stores and memsets to the global. | |||
321 | Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI); | |||
322 | } | |||
323 | ||||
324 | if (CE->use_empty()) { | |||
325 | CE->destroyConstant(); | |||
326 | Changed = true; | |||
327 | } | |||
328 | } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { | |||
329 | // Do not transform "gepinst (gep constexpr (GV))" here, because forming | |||
330 | // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold | |||
331 | // and will invalidate our notion of what Init is. | |||
332 | Constant *SubInit = nullptr; | |||
333 | if (!isa<ConstantExpr>(GEP->getOperand(0))) { | |||
334 | ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>( | |||
335 | ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction()))); | |||
336 | if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) | |||
337 | SubInit = ConstantFoldLoadThroughGEPConstantExpr( | |||
338 | Init, CE, V->getType()->getPointerElementType(), DL); | |||
339 | ||||
340 | // If the initializer is an all-null value and we have an inbounds GEP, | |||
341 | // we already know what the result of any load from that GEP is. | |||
342 | // TODO: Handle splats. | |||
343 | if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds()) | |||
344 | SubInit = Constant::getNullValue(GEP->getResultElementType()); | |||
345 | } | |||
346 | Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI); | |||
347 | ||||
348 | if (GEP->use_empty()) { | |||
349 | GEP->eraseFromParent(); | |||
350 | Changed = true; | |||
351 | } | |||
352 | } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv | |||
353 | if (MI->getRawDest() == V) { | |||
354 | MI->eraseFromParent(); | |||
355 | Changed = true; | |||
356 | } | |||
357 | ||||
358 | } else if (Constant *C = dyn_cast<Constant>(U)) { | |||
359 | // If we have a chain of dead constantexprs or other things dangling from | |||
360 | // us, and if they are all dead, nuke them without remorse. | |||
361 | if (isSafeToDestroyConstant(C)) { | |||
362 | C->destroyConstant(); | |||
363 | CleanupConstantGlobalUsers(V, Init, DL, GetTLI); | |||
364 | return true; | |||
365 | } | |||
366 | } | |||
367 | } | |||
368 | return Changed; | |||
369 | } | |||
370 | ||||
371 | static bool isSafeSROAElementUse(Value *V); | |||
372 | ||||
373 | /// Return true if the specified GEP is a safe user of a derived | |||
374 | /// expression from a global that we want to SROA. | |||
375 | static bool isSafeSROAGEP(User *U) { | |||
376 | // Check to see if this ConstantExpr GEP is SRA'able. In particular, we | |||
377 | // don't like < 3 operand CE's, and we don't like non-constant integer | |||
378 | // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some | |||
379 | // value of C. | |||
380 | if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || | |||
381 | !cast<Constant>(U->getOperand(1))->isNullValue()) | |||
382 | return false; | |||
383 | ||||
384 | gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); | |||
385 | ++GEPI; // Skip over the pointer index. | |||
386 | ||||
387 | // For all other level we require that the indices are constant and inrange. | |||
388 | // In particular, consider: A[0][i]. We cannot know that the user isn't doing | |||
389 | // invalid things like allowing i to index an out-of-range subscript that | |||
390 | // accesses A[1]. This can also happen between different members of a struct | |||
391 | // in llvm IR. | |||
392 | for (; GEPI != E; ++GEPI) { | |||
393 | if (GEPI.isStruct()) | |||
394 | continue; | |||
395 | ||||
396 | ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); | |||
397 | if (!IdxVal || (GEPI.isBoundedSequential() && | |||
398 | IdxVal->getZExtValue() >= GEPI.getSequentialNumElements())) | |||
399 | return false; | |||
400 | } | |||
401 | ||||
402 | return llvm::all_of(U->users(), | |||
403 | [](User *UU) { return isSafeSROAElementUse(UU); }); | |||
404 | } | |||
405 | ||||
406 | /// Return true if the specified instruction is a safe user of a derived | |||
407 | /// expression from a global that we want to SROA. | |||
408 | static bool isSafeSROAElementUse(Value *V) { | |||
409 | // We might have a dead and dangling constant hanging off of here. | |||
410 | if (Constant *C = dyn_cast<Constant>(V)) | |||
411 | return isSafeToDestroyConstant(C); | |||
412 | ||||
413 | Instruction *I = dyn_cast<Instruction>(V); | |||
414 | if (!I) return false; | |||
415 | ||||
416 | // Loads are ok. | |||
417 | if (isa<LoadInst>(I)) return true; | |||
418 | ||||
419 | // Stores *to* the pointer are ok. | |||
420 | if (StoreInst *SI = dyn_cast<StoreInst>(I)) | |||
421 | return SI->getOperand(0) != V; | |||
422 | ||||
423 | // Otherwise, it must be a GEP. Check it and its users are safe to SRA. | |||
424 | return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I); | |||
425 | } | |||
426 | ||||
427 | /// Look at all uses of the global and decide whether it is safe for us to | |||
428 | /// perform this transformation. | |||
429 | static bool GlobalUsersSafeToSRA(GlobalValue *GV) { | |||
430 | for (User *U : GV->users()) { | |||
431 | // The user of the global must be a GEP Inst or a ConstantExpr GEP. | |||
432 | if (!isa<GetElementPtrInst>(U) && | |||
433 | (!isa<ConstantExpr>(U) || | |||
434 | cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) | |||
435 | return false; | |||
436 | ||||
437 | // Check the gep and it's users are safe to SRA | |||
438 | if (!isSafeSROAGEP(U)) | |||
439 | return false; | |||
440 | } | |||
441 | ||||
442 | return true; | |||
443 | } | |||
444 | ||||
445 | static bool IsSRASequential(Type *T) { | |||
446 | return isa<ArrayType>(T) || isa<VectorType>(T); | |||
447 | } | |||
448 | static uint64_t GetSRASequentialNumElements(Type *T) { | |||
449 | if (ArrayType *AT = dyn_cast<ArrayType>(T)) | |||
450 | return AT->getNumElements(); | |||
451 | return cast<FixedVectorType>(T)->getNumElements(); | |||
452 | } | |||
453 | static Type *GetSRASequentialElementType(Type *T) { | |||
454 | if (ArrayType *AT = dyn_cast<ArrayType>(T)) | |||
455 | return AT->getElementType(); | |||
456 | return cast<VectorType>(T)->getElementType(); | |||
457 | } | |||
458 | static bool CanDoGlobalSRA(GlobalVariable *GV) { | |||
459 | Constant *Init = GV->getInitializer(); | |||
460 | ||||
461 | if (isa<StructType>(Init->getType())) { | |||
462 | // nothing to check | |||
463 | } else if (IsSRASequential(Init->getType())) { | |||
464 | if (GetSRASequentialNumElements(Init->getType()) > 16 && | |||
465 | GV->hasNUsesOrMore(16)) | |||
466 | return false; // It's not worth it. | |||
467 | } else | |||
468 | return false; | |||
469 | ||||
470 | return GlobalUsersSafeToSRA(GV); | |||
471 | } | |||
472 | ||||
473 | /// Copy over the debug info for a variable to its SRA replacements. | |||
474 | static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, | |||
475 | uint64_t FragmentOffsetInBits, | |||
476 | uint64_t FragmentSizeInBits, | |||
477 | uint64_t VarSize) { | |||
478 | SmallVector<DIGlobalVariableExpression *, 1> GVs; | |||
479 | GV->getDebugInfo(GVs); | |||
480 | for (auto *GVE : GVs) { | |||
481 | DIVariable *Var = GVE->getVariable(); | |||
482 | DIExpression *Expr = GVE->getExpression(); | |||
483 | // If the FragmentSize is smaller than the variable, | |||
484 | // emit a fragment expression. | |||
485 | if (FragmentSizeInBits < VarSize) { | |||
486 | if (auto E = DIExpression::createFragmentExpression( | |||
487 | Expr, FragmentOffsetInBits, FragmentSizeInBits)) | |||
488 | Expr = *E; | |||
489 | else | |||
490 | return; | |||
491 | } | |||
492 | auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr); | |||
493 | NGV->addDebugInfo(NGVE); | |||
494 | } | |||
495 | } | |||
496 | ||||
497 | /// Perform scalar replacement of aggregates on the specified global variable. | |||
498 | /// This opens the door for other optimizations by exposing the behavior of the | |||
499 | /// program in a more fine-grained way. We have determined that this | |||
500 | /// transformation is safe already. We return the first global variable we | |||
501 | /// insert so that the caller can reprocess it. | |||
502 | static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { | |||
503 | // Make sure this global only has simple uses that we can SRA. | |||
504 | if (!CanDoGlobalSRA(GV)) | |||
505 | return nullptr; | |||
506 | ||||
507 | assert(GV->hasLocalLinkage())((void)0); | |||
508 | Constant *Init = GV->getInitializer(); | |||
509 | Type *Ty = Init->getType(); | |||
510 | uint64_t VarSize = DL.getTypeSizeInBits(Ty); | |||
511 | ||||
512 | std::map<unsigned, GlobalVariable *> NewGlobals; | |||
513 | ||||
514 | // Get the alignment of the global, either explicit or target-specific. | |||
515 | Align StartAlignment = | |||
516 | DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getType()); | |||
517 | ||||
518 | // Loop over all users and create replacement variables for used aggregate | |||
519 | // elements. | |||
520 | for (User *GEP : GV->users()) { | |||
521 | assert(((isa<ConstantExpr>(GEP) && cast<ConstantExpr>(GEP)->getOpcode() ==((void)0) | |||
522 | Instruction::GetElementPtr) ||((void)0) | |||
523 | isa<GetElementPtrInst>(GEP)) &&((void)0) | |||
524 | "NonGEP CE's are not SRAable!")((void)0); | |||
525 | ||||
526 | // Ignore the 1th operand, which has to be zero or else the program is quite | |||
527 | // broken (undefined). Get the 2nd operand, which is the structure or array | |||
528 | // index. | |||
529 | unsigned ElementIdx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); | |||
530 | if (NewGlobals.count(ElementIdx) == 1) | |||
531 | continue; // we`ve already created replacement variable | |||
532 | assert(NewGlobals.count(ElementIdx) == 0)((void)0); | |||
533 | ||||
534 | Type *ElTy = nullptr; | |||
535 | if (StructType *STy = dyn_cast<StructType>(Ty)) | |||
536 | ElTy = STy->getElementType(ElementIdx); | |||
537 | else | |||
538 | ElTy = GetSRASequentialElementType(Ty); | |||
539 | assert(ElTy)((void)0); | |||
540 | ||||
541 | Constant *In = Init->getAggregateElement(ElementIdx); | |||
542 | assert(In && "Couldn't get element of initializer?")((void)0); | |||
543 | ||||
544 | GlobalVariable *NGV = new GlobalVariable( | |||
545 | ElTy, false, GlobalVariable::InternalLinkage, In, | |||
546 | GV->getName() + "." + Twine(ElementIdx), GV->getThreadLocalMode(), | |||
547 | GV->getType()->getAddressSpace()); | |||
548 | NGV->setExternallyInitialized(GV->isExternallyInitialized()); | |||
549 | NGV->copyAttributesFrom(GV); | |||
550 | NewGlobals.insert(std::make_pair(ElementIdx, NGV)); | |||
551 | ||||
552 | if (StructType *STy = dyn_cast<StructType>(Ty)) { | |||
553 | const StructLayout &Layout = *DL.getStructLayout(STy); | |||
554 | ||||
555 | // Calculate the known alignment of the field. If the original aggregate | |||
556 | // had 256 byte alignment for example, something might depend on that: | |||
557 | // propagate info to each field. | |||
558 | uint64_t FieldOffset = Layout.getElementOffset(ElementIdx); | |||
559 | Align NewAlign = commonAlignment(StartAlignment, FieldOffset); | |||
560 | if (NewAlign > DL.getABITypeAlign(STy->getElementType(ElementIdx))) | |||
561 | NGV->setAlignment(NewAlign); | |||
562 | ||||
563 | // Copy over the debug info for the variable. | |||
564 | uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType()); | |||
565 | uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx); | |||
566 | transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, VarSize); | |||
567 | } else { | |||
568 | uint64_t EltSize = DL.getTypeAllocSize(ElTy); | |||
569 | Align EltAlign = DL.getABITypeAlign(ElTy); | |||
570 | uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy); | |||
571 | ||||
572 | // Calculate the known alignment of the field. If the original aggregate | |||
573 | // had 256 byte alignment for example, something might depend on that: | |||
574 | // propagate info to each field. | |||
575 | Align NewAlign = commonAlignment(StartAlignment, EltSize * ElementIdx); | |||
576 | if (NewAlign > EltAlign) | |||
577 | NGV->setAlignment(NewAlign); | |||
578 | transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx, | |||
579 | FragmentSizeInBits, VarSize); | |||
580 | } | |||
581 | } | |||
582 | ||||
583 | if (NewGlobals.empty()) | |||
584 | return nullptr; | |||
585 | ||||
586 | Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); | |||
587 | for (auto NewGlobalVar : NewGlobals) | |||
588 | Globals.push_back(NewGlobalVar.second); | |||
589 | ||||
590 | LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n")do { } while (false); | |||
591 | ||||
592 | Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); | |||
593 | ||||
594 | // Loop over all of the uses of the global, replacing the constantexpr geps, | |||
595 | // with smaller constantexpr geps or direct references. | |||
596 | while (!GV->use_empty()) { | |||
597 | User *GEP = GV->user_back(); | |||
598 | assert(((isa<ConstantExpr>(GEP) &&((void)0) | |||
599 | cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||((void)0) | |||
600 | isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!")((void)0); | |||
601 | ||||
602 | // Ignore the 1th operand, which has to be zero or else the program is quite | |||
603 | // broken (undefined). Get the 2nd operand, which is the structure or array | |||
604 | // index. | |||
605 | unsigned ElementIdx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); | |||
606 | assert(NewGlobals.count(ElementIdx) == 1)((void)0); | |||
607 | ||||
608 | Value *NewPtr = NewGlobals[ElementIdx]; | |||
609 | Type *NewTy = NewGlobals[ElementIdx]->getValueType(); | |||
610 | ||||
611 | // Form a shorter GEP if needed. | |||
612 | if (GEP->getNumOperands() > 3) { | |||
613 | if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) { | |||
614 | SmallVector<Constant*, 8> Idxs; | |||
615 | Idxs.push_back(NullInt); | |||
616 | for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) | |||
617 | Idxs.push_back(CE->getOperand(i)); | |||
618 | NewPtr = | |||
619 | ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs); | |||
620 | } else { | |||
621 | GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); | |||
622 | SmallVector<Value*, 8> Idxs; | |||
623 | Idxs.push_back(NullInt); | |||
624 | for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) | |||
625 | Idxs.push_back(GEPI->getOperand(i)); | |||
626 | NewPtr = GetElementPtrInst::Create( | |||
627 | NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(ElementIdx), | |||
628 | GEPI); | |||
629 | } | |||
630 | } | |||
631 | GEP->replaceAllUsesWith(NewPtr); | |||
632 | ||||
633 | // We changed the pointer of any memory access user. Recalculate alignments. | |||
634 | for (User *U : NewPtr->users()) { | |||
635 | if (auto *Load = dyn_cast<LoadInst>(U)) { | |||
636 | Align PrefAlign = DL.getPrefTypeAlign(Load->getType()); | |||
637 | Align NewAlign = getOrEnforceKnownAlignment(Load->getPointerOperand(), | |||
638 | PrefAlign, DL, Load); | |||
639 | Load->setAlignment(NewAlign); | |||
640 | } | |||
641 | if (auto *Store = dyn_cast<StoreInst>(U)) { | |||
642 | Align PrefAlign = | |||
643 | DL.getPrefTypeAlign(Store->getValueOperand()->getType()); | |||
644 | Align NewAlign = getOrEnforceKnownAlignment(Store->getPointerOperand(), | |||
645 | PrefAlign, DL, Store); | |||
646 | Store->setAlignment(NewAlign); | |||
647 | } | |||
648 | } | |||
649 | ||||
650 | if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP)) | |||
651 | GEPI->eraseFromParent(); | |||
652 | else | |||
653 | cast<ConstantExpr>(GEP)->destroyConstant(); | |||
654 | } | |||
655 | ||||
656 | // Delete the old global, now that it is dead. | |||
657 | Globals.erase(GV); | |||
658 | ++NumSRA; | |||
659 | ||||
660 | assert(NewGlobals.size() > 0)((void)0); | |||
661 | return NewGlobals.begin()->second; | |||
662 | } | |||
663 | ||||
664 | /// Return true if all users of the specified value will trap if the value is | |||
665 | /// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid | |||
666 | /// reprocessing them. | |||
667 | static bool AllUsesOfValueWillTrapIfNull(const Value *V, | |||
668 | SmallPtrSetImpl<const PHINode*> &PHIs) { | |||
669 | for (const User *U : V->users()) { | |||
670 | if (const Instruction *I = dyn_cast<Instruction>(U)) { | |||
671 | // If null pointer is considered valid, then all uses are non-trapping. | |||
672 | // Non address-space 0 globals have already been pruned by the caller. | |||
673 | if (NullPointerIsDefined(I->getFunction())) | |||
674 | return false; | |||
675 | } | |||
676 | if (isa<LoadInst>(U)) { | |||
677 | // Will trap. | |||
678 | } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { | |||
679 | if (SI->getOperand(0) == V) { | |||
680 | //cerr << "NONTRAPPING USE: " << *U; | |||
681 | return false; // Storing the value. | |||
682 | } | |||
683 | } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { | |||
684 | if (CI->getCalledOperand() != V) { | |||
685 | //cerr << "NONTRAPPING USE: " << *U; | |||
686 | return false; // Not calling the ptr | |||
687 | } | |||
688 | } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { | |||
689 | if (II->getCalledOperand() != V) { | |||
690 | //cerr << "NONTRAPPING USE: " << *U; | |||
691 | return false; // Not calling the ptr | |||
692 | } | |||
693 | } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) { | |||
694 | if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; | |||
695 | } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { | |||
696 | if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; | |||
697 | } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { | |||
698 | // If we've already seen this phi node, ignore it, it has already been | |||
699 | // checked. | |||
700 | if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) | |||
701 | return false; | |||
702 | } else if (isa<ICmpInst>(U) && | |||
703 | !ICmpInst::isSigned(cast<ICmpInst>(U)->getPredicate()) && | |||
704 | isa<LoadInst>(U->getOperand(0)) && | |||
705 | isa<ConstantPointerNull>(U->getOperand(1))) { | |||
706 | assert(isa<GlobalValue>(((void)0) | |||
707 | cast<LoadInst>(U->getOperand(0))->getPointerOperand()) &&((void)0) | |||
708 | "Should be GlobalVariable")((void)0); | |||
709 | // This and only this kind of non-signed ICmpInst is to be replaced with | |||
710 | // the comparing of the value of the created global init bool later in | |||
711 | // optimizeGlobalAddressOfMalloc for the global variable. | |||
712 | } else { | |||
713 | //cerr << "NONTRAPPING USE: " << *U; | |||
714 | return false; | |||
715 | } | |||
716 | } | |||
717 | return true; | |||
718 | } | |||
719 | ||||
720 | /// Return true if all uses of any loads from GV will trap if the loaded value | |||
721 | /// is null. Note that this also permits comparisons of the loaded value | |||
722 | /// against null, as a special case. | |||
723 | static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { | |||
724 | for (const User *U : GV->users()) | |||
725 | if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { | |||
726 | SmallPtrSet<const PHINode*, 8> PHIs; | |||
727 | if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) | |||
728 | return false; | |||
729 | } else if (isa<StoreInst>(U)) { | |||
730 | // Ignore stores to the global. | |||
731 | } else { | |||
732 | // We don't know or understand this user, bail out. | |||
733 | //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; | |||
734 | return false; | |||
735 | } | |||
736 | return true; | |||
737 | } | |||
738 | ||||
739 | static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { | |||
740 | bool Changed = false; | |||
741 | for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) { | |||
742 | Instruction *I = cast<Instruction>(*UI++); | |||
743 | // Uses are non-trapping if null pointer is considered valid. | |||
744 | // Non address-space 0 globals are already pruned by the caller. | |||
745 | if (NullPointerIsDefined(I->getFunction())) | |||
746 | return false; | |||
747 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) { | |||
748 | LI->setOperand(0, NewV); | |||
749 | Changed = true; | |||
750 | } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { | |||
751 | if (SI->getOperand(1) == V) { | |||
752 | SI->setOperand(1, NewV); | |||
753 | Changed = true; | |||
754 | } | |||
755 | } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { | |||
756 | CallBase *CB = cast<CallBase>(I); | |||
757 | if (CB->getCalledOperand() == V) { | |||
758 | // Calling through the pointer! Turn into a direct call, but be careful | |||
759 | // that the pointer is not also being passed as an argument. | |||
760 | CB->setCalledOperand(NewV); | |||
761 | Changed = true; | |||
762 | bool PassedAsArg = false; | |||
763 | for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) | |||
764 | if (CB->getArgOperand(i) == V) { | |||
765 | PassedAsArg = true; | |||
766 | CB->setArgOperand(i, NewV); | |||
767 | } | |||
768 | ||||
769 | if (PassedAsArg) { | |||
770 | // Being passed as an argument also. Be careful to not invalidate UI! | |||
771 | UI = V->user_begin(); | |||
772 | } | |||
773 | } | |||
774 | } else if (CastInst *CI = dyn_cast<CastInst>(I)) { | |||
775 | Changed |= OptimizeAwayTrappingUsesOfValue(CI, | |||
776 | ConstantExpr::getCast(CI->getOpcode(), | |||
777 | NewV, CI->getType())); | |||
778 | if (CI->use_empty()) { | |||
779 | Changed = true; | |||
780 | CI->eraseFromParent(); | |||
781 | } | |||
782 | } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { | |||
783 | // Should handle GEP here. | |||
784 | SmallVector<Constant*, 8> Idxs; | |||
785 | Idxs.reserve(GEPI->getNumOperands()-1); | |||
786 | for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end(); | |||
787 | i != e; ++i) | |||
788 | if (Constant *C = dyn_cast<Constant>(*i)) | |||
789 | Idxs.push_back(C); | |||
790 | else | |||
791 | break; | |||
792 | if (Idxs.size() == GEPI->getNumOperands()-1) | |||
793 | Changed |= OptimizeAwayTrappingUsesOfValue( | |||
794 | GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(), | |||
795 | NewV, Idxs)); | |||
796 | if (GEPI->use_empty()) { | |||
797 | Changed = true; | |||
798 | GEPI->eraseFromParent(); | |||
799 | } | |||
800 | } | |||
801 | } | |||
802 | ||||
803 | return Changed; | |||
804 | } | |||
805 | ||||
806 | /// The specified global has only one non-null value stored into it. If there | |||
807 | /// are uses of the loaded value that would trap if the loaded value is | |||
808 | /// dynamically null, then we know that they cannot be reachable with a null | |||
809 | /// optimize away the load. | |||
810 | static bool OptimizeAwayTrappingUsesOfLoads( | |||
811 | GlobalVariable *GV, Constant *LV, const DataLayout &DL, | |||
812 | function_ref<TargetLibraryInfo &(Function &)> GetTLI) { | |||
813 | bool Changed = false; | |||
814 | ||||
815 | // Keep track of whether we are able to remove all the uses of the global | |||
816 | // other than the store that defines it. | |||
817 | bool AllNonStoreUsesGone = true; | |||
818 | ||||
819 | // Replace all uses of loads with uses of uses of the stored value. | |||
820 | for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){ | |||
821 | User *GlobalUser = *GUI++; | |||
822 | if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { | |||
823 | Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); | |||
824 | // If we were able to delete all uses of the loads | |||
825 | if (LI->use_empty()) { | |||
826 | LI->eraseFromParent(); | |||
827 | Changed = true; | |||
828 | } else { | |||
829 | AllNonStoreUsesGone = false; | |||
830 | } | |||
831 | } else if (isa<StoreInst>(GlobalUser)) { | |||
832 | // Ignore the store that stores "LV" to the global. | |||
833 | assert(GlobalUser->getOperand(1) == GV &&((void)0) | |||
834 | "Must be storing *to* the global")((void)0); | |||
835 | } else { | |||
836 | AllNonStoreUsesGone = false; | |||
837 | ||||
838 | // If we get here we could have other crazy uses that are transitively | |||
839 | // loaded. | |||
840 | assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||((void)0) | |||
841 | isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||((void)0) | |||
842 | isa<BitCastInst>(GlobalUser) ||((void)0) | |||
843 | isa<GetElementPtrInst>(GlobalUser)) &&((void)0) | |||
844 | "Only expect load and stores!")((void)0); | |||
845 | } | |||
846 | } | |||
847 | ||||
848 | if (Changed) { | |||
849 | LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GVdo { } while (false) | |||
850 | << "\n")do { } while (false); | |||
851 | ++NumGlobUses; | |||
852 | } | |||
853 | ||||
854 | // If we nuked all of the loads, then none of the stores are needed either, | |||
855 | // nor is the global. | |||
856 | if (AllNonStoreUsesGone) { | |||
857 | if (isLeakCheckerRoot(GV)) { | |||
858 | Changed |= CleanupPointerRootUsers(GV, GetTLI); | |||
859 | } else { | |||
860 | Changed = true; | |||
861 | CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI); | |||
862 | } | |||
863 | if (GV->use_empty()) { | |||
864 | LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n")do { } while (false); | |||
865 | Changed = true; | |||
866 | GV->eraseFromParent(); | |||
867 | ++NumDeleted; | |||
868 | } | |||
869 | } | |||
870 | return Changed; | |||
871 | } | |||
872 | ||||
873 | /// Walk the use list of V, constant folding all of the instructions that are | |||
874 | /// foldable. | |||
875 | static void ConstantPropUsersOf(Value *V, const DataLayout &DL, | |||
876 | TargetLibraryInfo *TLI) { | |||
877 | for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; ) | |||
878 | if (Instruction *I = dyn_cast<Instruction>(*UI++)) | |||
879 | if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) { | |||
880 | I->replaceAllUsesWith(NewC); | |||
881 | ||||
882 | // Advance UI to the next non-I use to avoid invalidating it! | |||
883 | // Instructions could multiply use V. | |||
884 | while (UI != E && *UI == I) | |||
885 | ++UI; | |||
886 | if (isInstructionTriviallyDead(I, TLI)) | |||
887 | I->eraseFromParent(); | |||
888 | } | |||
889 | } | |||
890 | ||||
891 | /// This function takes the specified global variable, and transforms the | |||
892 | /// program as if it always contained the result of the specified malloc. | |||
893 | /// Because it is always the result of the specified malloc, there is no reason | |||
894 | /// to actually DO the malloc. Instead, turn the malloc into a global, and any | |||
895 | /// loads of GV as uses of the new global. | |||
896 | static GlobalVariable * | |||
897 | OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, | |||
898 | ConstantInt *NElements, const DataLayout &DL, | |||
899 | TargetLibraryInfo *TLI) { | |||
900 | LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CIdo { } while (false) | |||
901 | << '\n')do { } while (false); | |||
902 | ||||
903 | Type *GlobalType; | |||
904 | if (NElements->getZExtValue() == 1) | |||
905 | GlobalType = AllocTy; | |||
906 | else | |||
907 | // If we have an array allocation, the global variable is of an array. | |||
908 | GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue()); | |||
909 | ||||
910 | // Create the new global variable. The contents of the malloc'd memory is | |||
911 | // undefined, so initialize with an undef value. | |||
912 | GlobalVariable *NewGV = new GlobalVariable( | |||
913 | *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage, | |||
914 | UndefValue::get(GlobalType), GV->getName() + ".body", nullptr, | |||
915 | GV->getThreadLocalMode()); | |||
916 | ||||
917 | // If there are bitcast users of the malloc (which is typical, usually we have | |||
918 | // a malloc + bitcast) then replace them with uses of the new global. Update | |||
919 | // other users to use the global as well. | |||
920 | BitCastInst *TheBC = nullptr; | |||
921 | while (!CI->use_empty()) { | |||
922 | Instruction *User = cast<Instruction>(CI->user_back()); | |||
923 | if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { | |||
924 | if (BCI->getType() == NewGV->getType()) { | |||
925 | BCI->replaceAllUsesWith(NewGV); | |||
926 | BCI->eraseFromParent(); | |||
927 | } else { | |||
928 | BCI->setOperand(0, NewGV); | |||
929 | } | |||
930 | } else { | |||
931 | if (!TheBC) | |||
932 | TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI); | |||
933 | User->replaceUsesOfWith(CI, TheBC); | |||
934 | } | |||
935 | } | |||
936 | ||||
937 | Constant *RepValue = NewGV; | |||
938 | if (NewGV->getType() != GV->getValueType()) | |||
939 | RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType()); | |||
940 | ||||
941 | // If there is a comparison against null, we will insert a global bool to | |||
942 | // keep track of whether the global was initialized yet or not. | |||
943 | GlobalVariable *InitBool = | |||
944 | new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, | |||
945 | GlobalValue::InternalLinkage, | |||
946 | ConstantInt::getFalse(GV->getContext()), | |||
947 | GV->getName()+".init", GV->getThreadLocalMode()); | |||
948 | bool InitBoolUsed = false; | |||
949 | ||||
950 | // Loop over all uses of GV, processing them in turn. | |||
951 | while (!GV->use_empty()) { | |||
952 | if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) { | |||
953 | // The global is initialized when the store to it occurs. If the stored | |||
954 | // value is null value, the global bool is set to false, otherwise true. | |||
955 | new StoreInst(ConstantInt::getBool( | |||
956 | GV->getContext(), | |||
957 | !isa<ConstantPointerNull>(SI->getValueOperand())), | |||
958 | InitBool, false, Align(1), SI->getOrdering(), | |||
959 | SI->getSyncScopeID(), SI); | |||
960 | SI->eraseFromParent(); | |||
961 | continue; | |||
962 | } | |||
963 | ||||
964 | LoadInst *LI = cast<LoadInst>(GV->user_back()); | |||
965 | while (!LI->use_empty()) { | |||
966 | Use &LoadUse = *LI->use_begin(); | |||
967 | ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser()); | |||
968 | if (!ICI) { | |||
969 | LoadUse = RepValue; | |||
970 | continue; | |||
971 | } | |||
972 | ||||
973 | // Replace the cmp X, 0 with a use of the bool value. | |||
974 | Value *LV = new LoadInst(InitBool->getValueType(), InitBool, | |||
975 | InitBool->getName() + ".val", false, Align(1), | |||
976 | LI->getOrdering(), LI->getSyncScopeID(), LI); | |||
977 | InitBoolUsed = true; | |||
978 | switch (ICI->getPredicate()) { | |||
979 | default: llvm_unreachable("Unknown ICmp Predicate!")__builtin_unreachable(); | |||
980 | case ICmpInst::ICMP_ULT: // X < null -> always false | |||
981 | LV = ConstantInt::getFalse(GV->getContext()); | |||
982 | break; | |||
983 | case ICmpInst::ICMP_UGE: // X >= null -> always true | |||
984 | LV = ConstantInt::getTrue(GV->getContext()); | |||
985 | break; | |||
986 | case ICmpInst::ICMP_ULE: | |||
987 | case ICmpInst::ICMP_EQ: | |||
988 | LV = BinaryOperator::CreateNot(LV, "notinit", ICI); | |||
989 | break; | |||
990 | case ICmpInst::ICMP_NE: | |||
991 | case ICmpInst::ICMP_UGT: | |||
992 | break; // no change. | |||
993 | } | |||
994 | ICI->replaceAllUsesWith(LV); | |||
995 | ICI->eraseFromParent(); | |||
996 | } | |||
997 | LI->eraseFromParent(); | |||
998 | } | |||
999 | ||||
1000 | // If the initialization boolean was used, insert it, otherwise delete it. | |||
1001 | if (!InitBoolUsed) { | |||
1002 | while (!InitBool->use_empty()) // Delete initializations | |||
1003 | cast<StoreInst>(InitBool->user_back())->eraseFromParent(); | |||
1004 | delete InitBool; | |||
1005 | } else | |||
1006 | GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool); | |||
1007 | ||||
1008 | // Now the GV is dead, nuke it and the malloc.. | |||
1009 | GV->eraseFromParent(); | |||
1010 | CI->eraseFromParent(); | |||
1011 | ||||
1012 | // To further other optimizations, loop over all users of NewGV and try to | |||
1013 | // constant prop them. This will promote GEP instructions with constant | |||
1014 | // indices into GEP constant-exprs, which will allow global-opt to hack on it. | |||
1015 | ConstantPropUsersOf(NewGV, DL, TLI); | |||
1016 | if (RepValue != NewGV) | |||
1017 | ConstantPropUsersOf(RepValue, DL, TLI); | |||
1018 | ||||
1019 | return NewGV; | |||
1020 | } | |||
1021 | ||||
1022 | /// Scan the use-list of V checking to make sure that there are no complex uses | |||
1023 | /// of V. We permit simple things like dereferencing the pointer, but not | |||
1024 | /// storing through the address, unless it is to the specified global. | |||
1025 | static bool | |||
1026 | valueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, | |||
1027 | const GlobalVariable *GV) { | |||
1028 | for (const User *U : V->users()) { | |||
1029 | const Instruction *Inst = cast<Instruction>(U); | |||
1030 | ||||
1031 | if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { | |||
1032 | continue; // Fine, ignore. | |||
1033 | } | |||
1034 | ||||
1035 | if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { | |||
1036 | if (SI->getOperand(0) == V && SI->getOperand(1) != GV) | |||
1037 | return false; // Storing the pointer itself... bad. | |||
1038 | continue; // Otherwise, storing through it, or storing into GV... fine. | |||
1039 | } | |||
1040 | ||||
1041 | if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { | |||
1042 | if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV)) | |||
1043 | return false; | |||
1044 | continue; | |||
1045 | } | |||
1046 | ||||
1047 | return false; | |||
1048 | } | |||
1049 | return true; | |||
1050 | } | |||
1051 | ||||
1052 | /// This function is called when we see a pointer global variable with a single | |||
1053 | /// value stored it that is a malloc or cast of malloc. | |||
1054 | static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, | |||
1055 | Type *AllocTy, | |||
1056 | AtomicOrdering Ordering, | |||
1057 | const DataLayout &DL, | |||
1058 | TargetLibraryInfo *TLI) { | |||
1059 | // If this is a malloc of an abstract type, don't touch it. | |||
1060 | if (!AllocTy->isSized()) | |||
1061 | return false; | |||
1062 | ||||
1063 | // We can't optimize this global unless all uses of it are *known* to be | |||
1064 | // of the malloc value, not of the null initializer value (consider a use | |||
1065 | // that compares the global's value against zero to see if the malloc has | |||
1066 | // been reached). To do this, we check to see if all uses of the global | |||
1067 | // would trap if the global were null: this proves that they must all | |||
1068 | // happen after the malloc. | |||
1069 | if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) | |||
1070 | return false; | |||
1071 | ||||
1072 | // We can't optimize this if the malloc itself is used in a complex way, | |||
1073 | // for example, being stored into multiple globals. This allows the | |||
1074 | // malloc to be stored into the specified global, loaded icmp'd. | |||
1075 | // These are all things we could transform to using the global for. | |||
1076 | if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV)) | |||
1077 | return false; | |||
1078 | ||||
1079 | // If we have a global that is only initialized with a fixed size malloc, | |||
1080 | // transform the program to use global memory instead of malloc'd memory. | |||
1081 | // This eliminates dynamic allocation, avoids an indirection accessing the | |||
1082 | // data, and exposes the resultant global to further GlobalOpt. | |||
1083 | // We cannot optimize the malloc if we cannot determine malloc array size. | |||
1084 | Value *NElems = getMallocArraySize(CI, DL, TLI, true); | |||
1085 | if (!NElems) | |||
1086 | return false; | |||
1087 | ||||
1088 | if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) | |||
1089 | // Restrict this transformation to only working on small allocations | |||
1090 | // (2048 bytes currently), as we don't want to introduce a 16M global or | |||
1091 | // something. | |||
1092 | if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) { | |||
1093 | OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); | |||
1094 | return true; | |||
1095 | } | |||
1096 | ||||
1097 | return false; | |||
1098 | } | |||
1099 | ||||
1100 | // Try to optimize globals based on the knowledge that only one value (besides | |||
1101 | // its initializer) is ever stored to the global. | |||
1102 | static bool | |||
1103 | optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, | |||
1104 | AtomicOrdering Ordering, const DataLayout &DL, | |||
1105 | function_ref<TargetLibraryInfo &(Function &)> GetTLI) { | |||
1106 | // Ignore no-op GEPs and bitcasts. | |||
1107 | StoredOnceVal = StoredOnceVal->stripPointerCasts(); | |||
1108 | ||||
1109 | // If we are dealing with a pointer global that is initialized to null and | |||
1110 | // only has one (non-null) value stored into it, then we can optimize any | |||
1111 | // users of the loaded value (often calls and loads) that would trap if the | |||
1112 | // value was null. | |||
1113 | if (GV->getInitializer()->getType()->isPointerTy() && | |||
1114 | GV->getInitializer()->isNullValue() && | |||
1115 | !NullPointerIsDefined( | |||
1116 | nullptr /* F */, | |||
1117 | GV->getInitializer()->getType()->getPointerAddressSpace())) { | |||
1118 | if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { | |||
1119 | if (GV->getInitializer()->getType() != SOVC->getType()) | |||
1120 | SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); | |||
1121 | ||||
1122 | // Optimize away any trapping uses of the loaded value. | |||
1123 | if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI)) | |||
1124 | return true; | |||
1125 | } else if (CallInst *CI = extractMallocCall(StoredOnceVal, GetTLI)) { | |||
1126 | auto *TLI = &GetTLI(*CI->getFunction()); | |||
1127 | Type *MallocType = getMallocAllocatedType(CI, TLI); | |||
1128 | if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, | |||
1129 | Ordering, DL, TLI)) | |||
1130 | return true; | |||
1131 | } | |||
1132 | } | |||
1133 | ||||
1134 | return false; | |||
1135 | } | |||
1136 | ||||
1137 | /// At this point, we have learned that the only two values ever stored into GV | |||
1138 | /// are its initializer and OtherVal. See if we can shrink the global into a | |||
1139 | /// boolean and select between the two values whenever it is used. This exposes | |||
1140 | /// the values to other scalar optimizations. | |||
1141 | static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { | |||
1142 | Type *GVElType = GV->getValueType(); | |||
1143 | ||||
1144 | // If GVElType is already i1, it is already shrunk. If the type of the GV is | |||
1145 | // an FP value, pointer or vector, don't do this optimization because a select | |||
1146 | // between them is very expensive and unlikely to lead to later | |||
1147 | // simplification. In these cases, we typically end up with "cond ? v1 : v2" | |||
1148 | // where v1 and v2 both require constant pool loads, a big loss. | |||
1149 | if (GVElType == Type::getInt1Ty(GV->getContext()) || | |||
1150 | GVElType->isFloatingPointTy() || | |||
1151 | GVElType->isPointerTy() || GVElType->isVectorTy()) | |||
1152 | return false; | |||
1153 | ||||
1154 | // Walk the use list of the global seeing if all the uses are load or store. | |||
1155 | // If there is anything else, bail out. | |||
1156 | for (User *U : GV->users()) | |||
1157 | if (!isa<LoadInst>(U) && !isa<StoreInst>(U)) | |||
1158 | return false; | |||
1159 | ||||
1160 | LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n")do { } while (false); | |||
1161 | ||||
1162 | // Create the new global, initializing it to false. | |||
1163 | GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), | |||
1164 | false, | |||
1165 | GlobalValue::InternalLinkage, | |||
1166 | ConstantInt::getFalse(GV->getContext()), | |||
1167 | GV->getName()+".b", | |||
1168 | GV->getThreadLocalMode(), | |||
1169 | GV->getType()->getAddressSpace()); | |||
1170 | NewGV->copyAttributesFrom(GV); | |||
1171 | GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV); | |||
1172 | ||||
1173 | Constant *InitVal = GV->getInitializer(); | |||
1174 | assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&((void)0) | |||
1175 | "No reason to shrink to bool!")((void)0); | |||
1176 | ||||
1177 | SmallVector<DIGlobalVariableExpression *, 1> GVs; | |||
1178 | GV->getDebugInfo(GVs); | |||
1179 | ||||
1180 | // If initialized to zero and storing one into the global, we can use a cast | |||
1181 | // instead of a select to synthesize the desired value. | |||
1182 | bool IsOneZero = false; | |||
1183 | bool EmitOneOrZero = true; | |||
1184 | auto *CI = dyn_cast<ConstantInt>(OtherVal); | |||
1185 | if (CI && CI->getValue().getActiveBits() <= 64) { | |||
1186 | IsOneZero = InitVal->isNullValue() && CI->isOne(); | |||
1187 | ||||
1188 | auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer()); | |||
1189 | if (CIInit && CIInit->getValue().getActiveBits() <= 64) { | |||
1190 | uint64_t ValInit = CIInit->getZExtValue(); | |||
1191 | uint64_t ValOther = CI->getZExtValue(); | |||
1192 | uint64_t ValMinus = ValOther - ValInit; | |||
1193 | ||||
1194 | for(auto *GVe : GVs){ | |||
1195 | DIGlobalVariable *DGV = GVe->getVariable(); | |||
1196 | DIExpression *E = GVe->getExpression(); | |||
1197 | const DataLayout &DL = GV->getParent()->getDataLayout(); | |||
1198 | unsigned SizeInOctets = | |||
1199 | DL.getTypeAllocSizeInBits(NewGV->getValueType()) / 8; | |||
1200 | ||||
1201 | // It is expected that the address of global optimized variable is on | |||
1202 | // top of the stack. After optimization, value of that variable will | |||
1203 | // be ether 0 for initial value or 1 for other value. The following | |||
1204 | // expression should return constant integer value depending on the | |||
1205 | // value at global object address: | |||
1206 | // val * (ValOther - ValInit) + ValInit: | |||
1207 | // DW_OP_deref DW_OP_constu <ValMinus> | |||
1208 | // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value | |||
1209 | SmallVector<uint64_t, 12> Ops = { | |||
1210 | dwarf::DW_OP_deref_size, SizeInOctets, | |||
1211 | dwarf::DW_OP_constu, ValMinus, | |||
1212 | dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit, | |||
1213 | dwarf::DW_OP_plus}; | |||
1214 | bool WithStackValue = true; | |||
1215 | E = DIExpression::prependOpcodes(E, Ops, WithStackValue); | |||
1216 | DIGlobalVariableExpression *DGVE = | |||
1217 | DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E); | |||
1218 | NewGV->addDebugInfo(DGVE); | |||
1219 | } | |||
1220 | EmitOneOrZero = false; | |||
1221 | } | |||
1222 | } | |||
1223 | ||||
1224 | if (EmitOneOrZero) { | |||
1225 | // FIXME: This will only emit address for debugger on which will | |||
1226 | // be written only 0 or 1. | |||
1227 | for(auto *GV : GVs) | |||
1228 | NewGV->addDebugInfo(GV); | |||
1229 | } | |||
1230 | ||||
1231 | while (!GV->use_empty()) { | |||
1232 | Instruction *UI = cast<Instruction>(GV->user_back()); | |||
1233 | if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { | |||
1234 | // Change the store into a boolean store. | |||
1235 | bool StoringOther = SI->getOperand(0) == OtherVal; | |||
1236 | // Only do this if we weren't storing a loaded value. | |||
1237 | Value *StoreVal; | |||
1238 | if (StoringOther || SI->getOperand(0) == InitVal) { | |||
1239 | StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), | |||
1240 | StoringOther); | |||
1241 | } else { | |||
1242 | // Otherwise, we are storing a previously loaded copy. To do this, | |||
1243 | // change the copy from copying the original value to just copying the | |||
1244 | // bool. | |||
1245 | Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); | |||
1246 | ||||
1247 | // If we've already replaced the input, StoredVal will be a cast or | |||
1248 | // select instruction. If not, it will be a load of the original | |||
1249 | // global. | |||
1250 | if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { | |||
1251 | assert(LI->getOperand(0) == GV && "Not a copy!")((void)0); | |||
1252 | // Insert a new load, to preserve the saved value. | |||
1253 | StoreVal = new LoadInst(NewGV->getValueType(), NewGV, | |||
1254 | LI->getName() + ".b", false, Align(1), | |||
1255 | LI->getOrdering(), LI->getSyncScopeID(), LI); | |||
1256 | } else { | |||
1257 | assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&((void)0) | |||
1258 | "This is not a form that we understand!")((void)0); | |||
1259 | StoreVal = StoredVal->getOperand(0); | |||
1260 | assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!")((void)0); | |||
1261 | } | |||
1262 | } | |||
1263 | StoreInst *NSI = | |||
1264 | new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(), | |||
1265 | SI->getSyncScopeID(), SI); | |||
1266 | NSI->setDebugLoc(SI->getDebugLoc()); | |||
1267 | } else { | |||
1268 | // Change the load into a load of bool then a select. | |||
1269 | LoadInst *LI = cast<LoadInst>(UI); | |||
1270 | LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV, | |||
1271 | LI->getName() + ".b", false, Align(1), | |||
1272 | LI->getOrdering(), LI->getSyncScopeID(), LI); | |||
1273 | Instruction *NSI; | |||
1274 | if (IsOneZero) | |||
1275 | NSI = new ZExtInst(NLI, LI->getType(), "", LI); | |||
1276 | else | |||
1277 | NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI); | |||
1278 | NSI->takeName(LI); | |||
1279 | // Since LI is split into two instructions, NLI and NSI both inherit the | |||
1280 | // same DebugLoc | |||
1281 | NLI->setDebugLoc(LI->getDebugLoc()); | |||
1282 | NSI->setDebugLoc(LI->getDebugLoc()); | |||
1283 | LI->replaceAllUsesWith(NSI); | |||
1284 | } | |||
1285 | UI->eraseFromParent(); | |||
1286 | } | |||
1287 | ||||
1288 | // Retain the name of the old global variable. People who are debugging their | |||
1289 | // programs may expect these variables to be named the same. | |||
1290 | NewGV->takeName(GV); | |||
1291 | GV->eraseFromParent(); | |||
1292 | return true; | |||
1293 | } | |||
1294 | ||||
1295 | static bool deleteIfDead( | |||
1296 | GlobalValue &GV, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { | |||
1297 | GV.removeDeadConstantUsers(); | |||
1298 | ||||
1299 | if (!GV.isDiscardableIfUnused() && !GV.isDeclaration()) | |||
1300 | return false; | |||
1301 | ||||
1302 | if (const Comdat *C = GV.getComdat()) | |||
1303 | if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C)) | |||
1304 | return false; | |||
1305 | ||||
1306 | bool Dead; | |||
1307 | if (auto *F = dyn_cast<Function>(&GV)) | |||
1308 | Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead(); | |||
1309 | else | |||
1310 | Dead = GV.use_empty(); | |||
1311 | if (!Dead) | |||
1312 | return false; | |||
1313 | ||||
1314 | LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n")do { } while (false); | |||
1315 | GV.eraseFromParent(); | |||
1316 | ++NumDeleted; | |||
1317 | return true; | |||
1318 | } | |||
1319 | ||||
1320 | static bool isPointerValueDeadOnEntryToFunction( | |||
1321 | const Function *F, GlobalValue *GV, | |||
1322 | function_ref<DominatorTree &(Function &)> LookupDomTree) { | |||
1323 | // Find all uses of GV. We expect them all to be in F, and if we can't | |||
1324 | // identify any of the uses we bail out. | |||
1325 | // | |||
1326 | // On each of these uses, identify if the memory that GV points to is | |||
1327 | // used/required/live at the start of the function. If it is not, for example | |||
1328 | // if the first thing the function does is store to the GV, the GV can | |||
1329 | // possibly be demoted. | |||
1330 | // | |||
1331 | // We don't do an exhaustive search for memory operations - simply look | |||
1332 | // through bitcasts as they're quite common and benign. | |||
1333 | const DataLayout &DL = GV->getParent()->getDataLayout(); | |||
1334 | SmallVector<LoadInst *, 4> Loads; | |||
1335 | SmallVector<StoreInst *, 4> Stores; | |||
1336 | for (auto *U : GV->users()) { | |||
1337 | if (Operator::getOpcode(U) == Instruction::BitCast) { | |||
1338 | for (auto *UU : U->users()) { | |||
1339 | if (auto *LI = dyn_cast<LoadInst>(UU)) | |||
1340 | Loads.push_back(LI); | |||
1341 | else if (auto *SI = dyn_cast<StoreInst>(UU)) | |||
1342 | Stores.push_back(SI); | |||
1343 | else | |||
1344 | return false; | |||
1345 | } | |||
1346 | continue; | |||
1347 | } | |||
1348 | ||||
1349 | Instruction *I = dyn_cast<Instruction>(U); | |||
1350 | if (!I) | |||
1351 | return false; | |||
1352 | assert(I->getParent()->getParent() == F)((void)0); | |||
1353 | ||||
1354 | if (auto *LI = dyn_cast<LoadInst>(I)) | |||
1355 | Loads.push_back(LI); | |||
1356 | else if (auto *SI = dyn_cast<StoreInst>(I)) | |||
1357 | Stores.push_back(SI); | |||
1358 | else | |||
1359 | return false; | |||
1360 | } | |||
1361 | ||||
1362 | // We have identified all uses of GV into loads and stores. Now check if all | |||
1363 | // of them are known not to depend on the value of the global at the function | |||
1364 | // entry point. We do this by ensuring that every load is dominated by at | |||
1365 | // least one store. | |||
1366 | auto &DT = LookupDomTree(*const_cast<Function *>(F)); | |||
1367 | ||||
1368 | // The below check is quadratic. Check we're not going to do too many tests. | |||
1369 | // FIXME: Even though this will always have worst-case quadratic time, we | |||
1370 | // could put effort into minimizing the average time by putting stores that | |||
1371 | // have been shown to dominate at least one load at the beginning of the | |||
1372 | // Stores array, making subsequent dominance checks more likely to succeed | |||
1373 | // early. | |||
1374 | // | |||
1375 | // The threshold here is fairly large because global->local demotion is a | |||
1376 | // very powerful optimization should it fire. | |||
1377 | const unsigned Threshold = 100; | |||
1378 | if (Loads.size() * Stores.size() > Threshold) | |||
1379 | return false; | |||
1380 | ||||
1381 | for (auto *L : Loads) { | |||
1382 | auto *LTy = L->getType(); | |||
1383 | if (none_of(Stores, [&](const StoreInst *S) { | |||
1384 | auto *STy = S->getValueOperand()->getType(); | |||
1385 | // The load is only dominated by the store if DomTree says so | |||
1386 | // and the number of bits loaded in L is less than or equal to | |||
1387 | // the number of bits stored in S. | |||
1388 | return DT.dominates(S, L) && | |||
1389 | DL.getTypeStoreSize(LTy).getFixedSize() <= | |||
1390 | DL.getTypeStoreSize(STy).getFixedSize(); | |||
1391 | })) | |||
1392 | return false; | |||
1393 | } | |||
1394 | // All loads have known dependences inside F, so the global can be localized. | |||
1395 | return true; | |||
1396 | } | |||
1397 | ||||
1398 | /// C may have non-instruction users. Can all of those users be turned into | |||
1399 | /// instructions? | |||
1400 | static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) { | |||
1401 | // We don't do this exhaustively. The most common pattern that we really need | |||
1402 | // to care about is a constant GEP or constant bitcast - so just looking | |||
1403 | // through one single ConstantExpr. | |||
1404 | // | |||
1405 | // The set of constants that this function returns true for must be able to be | |||
1406 | // handled by makeAllConstantUsesInstructions. | |||
1407 | for (auto *U : C->users()) { | |||
1408 | if (isa<Instruction>(U)) | |||
1409 | continue; | |||
1410 | if (!isa<ConstantExpr>(U)) | |||
1411 | // Non instruction, non-constantexpr user; cannot convert this. | |||
1412 | return false; | |||
1413 | for (auto *UU : U->users()) | |||
1414 | if (!isa<Instruction>(UU)) | |||
1415 | // A constantexpr used by another constant. We don't try and recurse any | |||
1416 | // further but just bail out at this point. | |||
1417 | return false; | |||
1418 | } | |||
1419 | ||||
1420 | return true; | |||
1421 | } | |||
1422 | ||||
1423 | /// C may have non-instruction users, and | |||
1424 | /// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the | |||
1425 | /// non-instruction users to instructions. | |||
1426 | static void makeAllConstantUsesInstructions(Constant *C) { | |||
1427 | SmallVector<ConstantExpr*,4> Users; | |||
1428 | for (auto *U : C->users()) { | |||
1429 | if (isa<ConstantExpr>(U)) | |||
1430 | Users.push_back(cast<ConstantExpr>(U)); | |||
1431 | else | |||
1432 | // We should never get here; allNonInstructionUsersCanBeMadeInstructions | |||
1433 | // should not have returned true for C. | |||
1434 | assert(((void)0) | |||
1435 | isa<Instruction>(U) &&((void)0) | |||
1436 | "Can't transform non-constantexpr non-instruction to instruction!")((void)0); | |||
1437 | } | |||
1438 | ||||
1439 | SmallVector<Value*,4> UUsers; | |||
1440 | for (auto *U : Users) { | |||
1441 | UUsers.clear(); | |||
1442 | append_range(UUsers, U->users()); | |||
1443 | for (auto *UU : UUsers) { | |||
1444 | Instruction *UI = cast<Instruction>(UU); | |||
1445 | Instruction *NewU = U->getAsInstruction(); | |||
1446 | NewU->insertBefore(UI); | |||
1447 | UI->replaceUsesOfWith(U, NewU); | |||
1448 | } | |||
1449 | // We've replaced all the uses, so destroy the constant. (destroyConstant | |||
1450 | // will update value handles and metadata.) | |||
1451 | U->destroyConstant(); | |||
1452 | } | |||
1453 | } | |||
1454 | ||||
1455 | /// Analyze the specified global variable and optimize | |||
1456 | /// it if possible. If we make a change, return true. | |||
1457 | static bool | |||
1458 | processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, | |||
1459 | function_ref<TargetLibraryInfo &(Function &)> GetTLI, | |||
1460 | function_ref<DominatorTree &(Function &)> LookupDomTree) { | |||
1461 | auto &DL = GV->getParent()->getDataLayout(); | |||
1462 | // If this is a first class global and has only one accessing function and | |||
1463 | // this function is non-recursive, we replace the global with a local alloca | |||
1464 | // in this function. | |||
1465 | // | |||
1466 | // NOTE: It doesn't make sense to promote non-single-value types since we | |||
1467 | // are just replacing static memory to stack memory. | |||
1468 | // | |||
1469 | // If the global is in different address space, don't bring it to stack. | |||
1470 | if (!GS.HasMultipleAccessingFunctions && | |||
1471 | GS.AccessingFunction && | |||
1472 | GV->getValueType()->isSingleValueType() && | |||
1473 | GV->getType()->getAddressSpace() == 0 && | |||
1474 | !GV->isExternallyInitialized() && | |||
1475 | allNonInstructionUsersCanBeMadeInstructions(GV) && | |||
1476 | GS.AccessingFunction->doesNotRecurse() && | |||
1477 | isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV, | |||
1478 | LookupDomTree)) { | |||
1479 | const DataLayout &DL = GV->getParent()->getDataLayout(); | |||
1480 | ||||
1481 | LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n")do { } while (false); | |||
1482 | Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction | |||
1483 | ->getEntryBlock().begin()); | |||
1484 | Type *ElemTy = GV->getValueType(); | |||
1485 | // FIXME: Pass Global's alignment when globals have alignment | |||
1486 | AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr, | |||
1487 | GV->getName(), &FirstI); | |||
1488 | if (!isa<UndefValue>(GV->getInitializer())) | |||
1489 | new StoreInst(GV->getInitializer(), Alloca, &FirstI); | |||
1490 | ||||
1491 | makeAllConstantUsesInstructions(GV); | |||
1492 | ||||
1493 | GV->replaceAllUsesWith(Alloca); | |||
1494 | GV->eraseFromParent(); | |||
1495 | ++NumLocalized; | |||
1496 | return true; | |||
1497 | } | |||
1498 | ||||
1499 | bool Changed = false; | |||
1500 | ||||
1501 | // If the global is never loaded (but may be stored to), it is dead. | |||
1502 | // Delete it now. | |||
1503 | if (!GS.IsLoaded) { | |||
1504 | LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n")do { } while (false); | |||
1505 | ||||
1506 | if (isLeakCheckerRoot(GV)) { | |||
1507 | // Delete any constant stores to the global. | |||
1508 | Changed = CleanupPointerRootUsers(GV, GetTLI); | |||
1509 | } else { | |||
1510 | // Delete any stores we can find to the global. We may not be able to | |||
1511 | // make it completely dead though. | |||
1512 | Changed = | |||
1513 | CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); | |||
1514 | } | |||
1515 | ||||
1516 | // If the global is dead now, delete it. | |||
1517 | if (GV->use_empty()) { | |||
1518 | GV->eraseFromParent(); | |||
1519 | ++NumDeleted; | |||
1520 | Changed = true; | |||
1521 | } | |||
1522 | return Changed; | |||
1523 | ||||
1524 | } | |||
1525 | if (GS.StoredType <= GlobalStatus::InitializerStored) { | |||
1526 | LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n")do { } while (false); | |||
1527 | ||||
1528 | // Don't actually mark a global constant if it's atomic because atomic loads | |||
1529 | // are implemented by a trivial cmpxchg in some edge-cases and that usually | |||
1530 | // requires write access to the variable even if it's not actually changed. | |||
1531 | if (GS.Ordering == AtomicOrdering::NotAtomic) { | |||
1532 | assert(!GV->isConstant() && "Expected a non-constant global")((void)0); | |||
1533 | GV->setConstant(true); | |||
1534 | Changed = true; | |||
1535 | } | |||
1536 | ||||
1537 | // Clean up any obviously simplifiable users now. | |||
1538 | Changed |= CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); | |||
1539 | ||||
1540 | // If the global is dead now, just nuke it. | |||
1541 | if (GV->use_empty()) { | |||
1542 | LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify "do { } while (false) | |||
1543 | << "all users and delete global!\n")do { } while (false); | |||
1544 | GV->eraseFromParent(); | |||
1545 | ++NumDeleted; | |||
1546 | return true; | |||
1547 | } | |||
1548 | ||||
1549 | // Fall through to the next check; see if we can optimize further. | |||
1550 | ++NumMarked; | |||
1551 | } | |||
1552 | if (!GV->getInitializer()->getType()->isSingleValueType()) { | |||
1553 | const DataLayout &DL = GV->getParent()->getDataLayout(); | |||
1554 | if (SRAGlobal(GV, DL)) | |||
1555 | return true; | |||
1556 | } | |||
1557 | if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) { | |||
1558 | // If the initial value for the global was an undef value, and if only | |||
1559 | // one other value was stored into it, we can just change the | |||
1560 | // initializer to be the stored value, then delete all stores to the | |||
1561 | // global. This allows us to mark it constant. | |||
1562 | if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) | |||
1563 | if (isa<UndefValue>(GV->getInitializer())) { | |||
1564 | // Change the initial value here. | |||
1565 | GV->setInitializer(SOVConstant); | |||
1566 | ||||
1567 | // Clean up any obviously simplifiable users now. | |||
1568 | CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); | |||
1569 | ||||
1570 | if (GV->use_empty()) { | |||
1571 | LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "do { } while (false) | |||
1572 | << "simplify all users and delete global!\n")do { } while (false); | |||
1573 | GV->eraseFromParent(); | |||
1574 | ++NumDeleted; | |||
1575 | } | |||
1576 | ++NumSubstitute; | |||
1577 | return true; | |||
1578 | } | |||
1579 | ||||
1580 | // Try to optimize globals based on the knowledge that only one value | |||
1581 | // (besides its initializer) is ever stored to the global. | |||
1582 | if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, | |||
1583 | GetTLI)) | |||
1584 | return true; | |||
1585 | ||||
1586 | // Otherwise, if the global was not a boolean, we can shrink it to be a | |||
1587 | // boolean. | |||
1588 | if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) { | |||
1589 | if (GS.Ordering == AtomicOrdering::NotAtomic) { | |||
1590 | if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { | |||
1591 | ++NumShrunkToBool; | |||
1592 | return true; | |||
1593 | } | |||
1594 | } | |||
1595 | } | |||
1596 | } | |||
1597 | ||||
1598 | return Changed; | |||
1599 | } | |||
1600 | ||||
1601 | /// Analyze the specified global variable and optimize it if possible. If we | |||
1602 | /// make a change, return true. | |||
1603 | static bool | |||
1604 | processGlobal(GlobalValue &GV, | |||
1605 | function_ref<TargetLibraryInfo &(Function &)> GetTLI, | |||
1606 | function_ref<DominatorTree &(Function &)> LookupDomTree) { | |||
1607 | if (GV.getName().startswith("llvm.")) | |||
1608 | return false; | |||
1609 | ||||
1610 | GlobalStatus GS; | |||
1611 | ||||
1612 | if (GlobalStatus::analyzeGlobal(&GV, GS)) | |||
1613 | return false; | |||
1614 | ||||
1615 | bool Changed = false; | |||
1616 | if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) { | |||
1617 | auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global | |||
1618 | : GlobalValue::UnnamedAddr::Local; | |||
1619 | if (NewUnnamedAddr != GV.getUnnamedAddr()) { | |||
1620 | GV.setUnnamedAddr(NewUnnamedAddr); | |||
1621 | NumUnnamed++; | |||
1622 | Changed = true; | |||
1623 | } | |||
1624 | } | |||
1625 | ||||
1626 | // Do more involved optimizations if the global is internal. | |||
1627 | if (!GV.hasLocalLinkage()) | |||
1628 | return Changed; | |||
1629 | ||||
1630 | auto *GVar = dyn_cast<GlobalVariable>(&GV); | |||
1631 | if (!GVar) | |||
1632 | return Changed; | |||
1633 | ||||
1634 | if (GVar->isConstant() || !GVar->hasInitializer()) | |||
1635 | return Changed; | |||
1636 | ||||
1637 | return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed; | |||
1638 | } | |||
1639 | ||||
1640 | /// Walk all of the direct calls of the specified function, changing them to | |||
1641 | /// FastCC. | |||
1642 | static void ChangeCalleesToFastCall(Function *F) { | |||
1643 | for (User *U : F->users()) { | |||
1644 | if (isa<BlockAddress>(U)) | |||
1645 | continue; | |||
1646 | cast<CallBase>(U)->setCallingConv(CallingConv::Fast); | |||
1647 | } | |||
1648 | } | |||
1649 | ||||
1650 | static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs, | |||
1651 | Attribute::AttrKind A) { | |||
1652 | unsigned AttrIndex; | |||
1653 | if (Attrs.hasAttrSomewhere(A, &AttrIndex)) | |||
1654 | return Attrs.removeAttribute(C, AttrIndex, A); | |||
1655 | return Attrs; | |||
1656 | } | |||
1657 | ||||
1658 | static void RemoveAttribute(Function *F, Attribute::AttrKind A) { | |||
1659 | F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A)); | |||
1660 | for (User *U : F->users()) { | |||
1661 | if (isa<BlockAddress>(U)) | |||
1662 | continue; | |||
1663 | CallBase *CB = cast<CallBase>(U); | |||
1664 | CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A)); | |||
1665 | } | |||
1666 | } | |||
1667 | ||||
1668 | /// Return true if this is a calling convention that we'd like to change. The | |||
1669 | /// idea here is that we don't want to mess with the convention if the user | |||
1670 | /// explicitly requested something with performance implications like coldcc, | |||
1671 | /// GHC, or anyregcc. | |||
1672 | static bool hasChangeableCC(Function *F) { | |||
1673 | CallingConv::ID CC = F->getCallingConv(); | |||
1674 | ||||
1675 | // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc? | |||
1676 | if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall) | |||
1677 | return false; | |||
1678 | ||||
1679 | // FIXME: Change CC for the whole chain of musttail calls when possible. | |||
1680 | // | |||
1681 | // Can't change CC of the function that either has musttail calls, or is a | |||
1682 | // musttail callee itself | |||
1683 | for (User *U : F->users()) { | |||
1684 | if (isa<BlockAddress>(U)) | |||
1685 | continue; | |||
1686 | CallInst* CI = dyn_cast<CallInst>(U); | |||
1687 | if (!CI) | |||
1688 | continue; | |||
1689 | ||||
1690 | if (CI->isMustTailCall()) | |||
1691 | return false; | |||
1692 | } | |||
1693 | ||||
1694 | for (BasicBlock &BB : *F) | |||
1695 | if (BB.getTerminatingMustTailCall()) | |||
1696 | return false; | |||
1697 | ||||
1698 | return true; | |||
1699 | } | |||
1700 | ||||
1701 | /// Return true if the block containing the call site has a BlockFrequency of | |||
1702 | /// less than ColdCCRelFreq% of the entry block. | |||
1703 | static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) { | |||
1704 | const BranchProbability ColdProb(ColdCCRelFreq, 100); | |||
1705 | auto *CallSiteBB = CB.getParent(); | |||
1706 | auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB); | |||
1707 | auto CallerEntryFreq = | |||
1708 | CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock())); | |||
1709 | return CallSiteFreq < CallerEntryFreq * ColdProb; | |||
1710 | } | |||
1711 | ||||
1712 | // This function checks if the input function F is cold at all call sites. It | |||
1713 | // also looks each call site's containing function, returning false if the | |||
1714 | // caller function contains other non cold calls. The input vector AllCallsCold | |||
1715 | // contains a list of functions that only have call sites in cold blocks. | |||
1716 | static bool | |||
1717 | isValidCandidateForColdCC(Function &F, | |||
1718 | function_ref<BlockFrequencyInfo &(Function &)> GetBFI, | |||
1719 | const std::vector<Function *> &AllCallsCold) { | |||
1720 | ||||
1721 | if (F.user_empty()) | |||
1722 | return false; | |||
1723 | ||||
1724 | for (User *U : F.users()) { | |||
1725 | if (isa<BlockAddress>(U)) | |||
1726 | continue; | |||
1727 | ||||
1728 | CallBase &CB = cast<CallBase>(*U); | |||
1729 | Function *CallerFunc = CB.getParent()->getParent(); | |||
1730 | BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc); | |||
1731 | if (!isColdCallSite(CB, CallerBFI)) | |||
1732 | return false; | |||
1733 | if (!llvm::is_contained(AllCallsCold, CallerFunc)) | |||
1734 | return false; | |||
1735 | } | |||
1736 | return true; | |||
1737 | } | |||
1738 | ||||
1739 | static void changeCallSitesToColdCC(Function *F) { | |||
1740 | for (User *U : F->users()) { | |||
1741 | if (isa<BlockAddress>(U)) | |||
1742 | continue; | |||
1743 | cast<CallBase>(U)->setCallingConv(CallingConv::Cold); | |||
1744 | } | |||
1745 | } | |||
1746 | ||||
1747 | // This function iterates over all the call instructions in the input Function | |||
1748 | // and checks that all call sites are in cold blocks and are allowed to use the | |||
1749 | // coldcc calling convention. | |||
1750 | static bool | |||
1751 | hasOnlyColdCalls(Function &F, | |||
1752 | function_ref<BlockFrequencyInfo &(Function &)> GetBFI) { | |||
1753 | for (BasicBlock &BB : F) { | |||
1754 | for (Instruction &I : BB) { | |||
1755 | if (CallInst *CI = dyn_cast<CallInst>(&I)) { | |||
1756 | // Skip over isline asm instructions since they aren't function calls. | |||
1757 | if (CI->isInlineAsm()) | |||
1758 | continue; | |||
1759 | Function *CalledFn = CI->getCalledFunction(); | |||
1760 | if (!CalledFn) | |||
1761 | return false; | |||
1762 | if (!CalledFn->hasLocalLinkage()) | |||
1763 | return false; | |||
1764 | // Skip over instrinsics since they won't remain as function calls. | |||
1765 | if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic) | |||
1766 | continue; | |||
1767 | // Check if it's valid to use coldcc calling convention. | |||
1768 | if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() || | |||
1769 | CalledFn->hasAddressTaken()) | |||
1770 | return false; | |||
1771 | BlockFrequencyInfo &CallerBFI = GetBFI(F); | |||
1772 | if (!isColdCallSite(*CI, CallerBFI)) | |||
1773 | return false; | |||
1774 | } | |||
1775 | } | |||
1776 | } | |||
1777 | return true; | |||
1778 | } | |||
1779 | ||||
1780 | static bool hasMustTailCallers(Function *F) { | |||
1781 | for (User *U : F->users()) { | |||
1782 | CallBase *CB = dyn_cast<CallBase>(U); | |||
1783 | if (!CB) { | |||
1784 | assert(isa<BlockAddress>(U) &&((void)0) | |||
1785 | "Expected either CallBase or BlockAddress")((void)0); | |||
1786 | continue; | |||
1787 | } | |||
1788 | if (CB->isMustTailCall()) | |||
1789 | return true; | |||
1790 | } | |||
1791 | return false; | |||
1792 | } | |||
1793 | ||||
1794 | static bool hasInvokeCallers(Function *F) { | |||
1795 | for (User *U : F->users()) | |||
1796 | if (isa<InvokeInst>(U)) | |||
1797 | return true; | |||
1798 | return false; | |||
1799 | } | |||
1800 | ||||
1801 | static void RemovePreallocated(Function *F) { | |||
1802 | RemoveAttribute(F, Attribute::Preallocated); | |||
1803 | ||||
1804 | auto *M = F->getParent(); | |||
1805 | ||||
1806 | IRBuilder<> Builder(M->getContext()); | |||
1807 | ||||
1808 | // Cannot modify users() while iterating over it, so make a copy. | |||
1809 | SmallVector<User *, 4> PreallocatedCalls(F->users()); | |||
1810 | for (User *U : PreallocatedCalls) { | |||
1811 | CallBase *CB = dyn_cast<CallBase>(U); | |||
1812 | if (!CB
| |||
1813 | continue; | |||
1814 | ||||
1815 | assert(((void)0) | |||
1816 | !CB->isMustTailCall() &&((void)0) | |||
1817 | "Shouldn't call RemotePreallocated() on a musttail preallocated call")((void)0); | |||
1818 | // Create copy of call without "preallocated" operand bundle. | |||
1819 | SmallVector<OperandBundleDef, 1> OpBundles; | |||
1820 | CB->getOperandBundlesAsDefs(OpBundles); | |||
1821 | CallBase *PreallocatedSetup = nullptr; | |||
1822 | for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) { | |||
1823 | if (It->getTag() == "preallocated") { | |||
1824 | PreallocatedSetup = cast<CallBase>(*It->input_begin()); | |||
1825 | OpBundles.erase(It); | |||
1826 | break; | |||
1827 | } | |||
1828 | } | |||
1829 | assert(PreallocatedSetup && "Did not find preallocated bundle")((void)0); | |||
1830 | uint64_t ArgCount = | |||
1831 | cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue(); | |||
| ||||
1832 | ||||
1833 | assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) &&((void)0) | |||
1834 | "Unknown indirect call type")((void)0); | |||
1835 | CallBase *NewCB = CallBase::Create(CB, OpBundles, CB); | |||
1836 | CB->replaceAllUsesWith(NewCB); | |||
1837 | NewCB->takeName(CB); | |||
1838 | CB->eraseFromParent(); | |||
1839 | ||||
1840 | Builder.SetInsertPoint(PreallocatedSetup); | |||
1841 | auto *StackSave = | |||
1842 | Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stacksave)); | |||
1843 | ||||
1844 | Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction()); | |||
1845 | Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackrestore), | |||
1846 | StackSave); | |||
1847 | ||||
1848 | // Replace @llvm.call.preallocated.arg() with alloca. | |||
1849 | // Cannot modify users() while iterating over it, so make a copy. | |||
1850 | // @llvm.call.preallocated.arg() can be called with the same index multiple | |||
1851 | // times. So for each @llvm.call.preallocated.arg(), we see if we have | |||
1852 | // already created a Value* for the index, and if not, create an alloca and | |||
1853 | // bitcast right after the @llvm.call.preallocated.setup() so that it | |||
1854 | // dominates all uses. | |||
1855 | SmallVector<Value *, 2> ArgAllocas(ArgCount); | |||
1856 | SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users()); | |||
1857 | for (auto *User : PreallocatedArgs) { | |||
1858 | auto *UseCall = cast<CallBase>(User); | |||
1859 | assert(UseCall->getCalledFunction()->getIntrinsicID() ==((void)0) | |||
1860 | Intrinsic::call_preallocated_arg &&((void)0) | |||
1861 | "preallocated token use was not a llvm.call.preallocated.arg")((void)0); | |||
1862 | uint64_t AllocArgIndex = | |||
1863 | cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue(); | |||
1864 | Value *AllocaReplacement = ArgAllocas[AllocArgIndex]; | |||
1865 | if (!AllocaReplacement) { | |||
1866 | auto AddressSpace = UseCall->getType()->getPointerAddressSpace(); | |||
1867 | auto *ArgType = UseCall | |||
1868 | ->getAttribute(AttributeList::FunctionIndex, | |||
1869 | Attribute::Preallocated) | |||
1870 | .getValueAsType(); | |||
1871 | auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction(); | |||
1872 | Builder.SetInsertPoint(InsertBefore); | |||
1873 | auto *Alloca = | |||
1874 | Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg"); | |||
1875 | auto *BitCast = Builder.CreateBitCast( | |||
1876 | Alloca, Type::getInt8PtrTy(M->getContext()), UseCall->getName()); | |||
1877 | ArgAllocas[AllocArgIndex] = BitCast; | |||
1878 | AllocaReplacement = BitCast; | |||
1879 | } | |||
1880 | ||||
1881 | UseCall->replaceAllUsesWith(AllocaReplacement); | |||
1882 | UseCall->eraseFromParent(); | |||
1883 | } | |||
1884 | // Remove @llvm.call.preallocated.setup(). | |||
1885 | cast<Instruction>(PreallocatedSetup)->eraseFromParent(); | |||
1886 | } | |||
1887 | } | |||
1888 | ||||
1889 | static bool | |||
1890 | OptimizeFunctions(Module &M, | |||
1891 | function_ref<TargetLibraryInfo &(Function &)> GetTLI, | |||
1892 | function_ref<TargetTransformInfo &(Function &)> GetTTI, | |||
1893 | function_ref<BlockFrequencyInfo &(Function &)> GetBFI, | |||
1894 | function_ref<DominatorTree &(Function &)> LookupDomTree, | |||
1895 | SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { | |||
1896 | ||||
1897 | bool Changed = false; | |||
1898 | ||||
1899 | std::vector<Function *> AllCallsCold; | |||
1900 | for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) { | |||
1901 | Function *F = &*FI++; | |||
1902 | if (hasOnlyColdCalls(*F, GetBFI)) | |||
1903 | AllCallsCold.push_back(F); | |||
1904 | } | |||
1905 | ||||
1906 | // Optimize functions. | |||
1907 | for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { | |||
1908 | Function *F = &*FI++; | |||
1909 | ||||
1910 | // Don't perform global opt pass on naked functions; we don't want fast | |||
1911 | // calling conventions for naked functions. | |||
1912 | if (F->hasFnAttribute(Attribute::Naked)) | |||
1913 | continue; | |||
1914 | ||||
1915 | // Functions without names cannot be referenced outside this module. | |||
1916 | if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage()) | |||
1917 | F->setLinkage(GlobalValue::InternalLinkage); | |||
1918 | ||||
1919 | if (deleteIfDead(*F, NotDiscardableComdats)) { | |||
1920 | Changed = true; | |||
1921 | continue; | |||
1922 | } | |||
1923 | ||||
1924 | // LLVM's definition of dominance allows instructions that are cyclic | |||
1925 | // in unreachable blocks, e.g.: | |||
1926 | // %pat = select i1 %condition, @global, i16* %pat | |||
1927 | // because any instruction dominates an instruction in a block that's | |||
1928 | // not reachable from entry. | |||
1929 | // So, remove unreachable blocks from the function, because a) there's | |||
1930 | // no point in analyzing them and b) GlobalOpt should otherwise grow | |||
1931 | // some more complicated logic to break these cycles. | |||
1932 | // Removing unreachable blocks might invalidate the dominator so we | |||
1933 | // recalculate it. | |||
1934 | if (!F->isDeclaration()) { | |||
1935 | if (removeUnreachableBlocks(*F)) { | |||
1936 | auto &DT = LookupDomTree(*F); | |||
1937 | DT.recalculate(*F); | |||
1938 | Changed = true; | |||
1939 | } | |||
1940 | } | |||
1941 | ||||
1942 | Changed |= processGlobal(*F, GetTLI, LookupDomTree); | |||
1943 | ||||
1944 | if (!F->hasLocalLinkage()) | |||
1945 | continue; | |||
1946 | ||||
1947 | // If we have an inalloca parameter that we can safely remove the | |||
1948 | // inalloca attribute from, do so. This unlocks optimizations that | |||
1949 | // wouldn't be safe in the presence of inalloca. | |||
1950 | // FIXME: We should also hoist alloca affected by this to the entry | |||
1951 | // block if possible. | |||
1952 | if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) && | |||
1953 | !F->hasAddressTaken() && !hasMustTailCallers(F)) { | |||
1954 | RemoveAttribute(F, Attribute::InAlloca); | |||
1955 | Changed = true; | |||
1956 | } | |||
1957 | ||||
1958 | // FIXME: handle invokes | |||
1959 | // FIXME: handle musttail | |||
1960 | if (F->getAttributes().hasAttrSomewhere(Attribute::Preallocated)) { | |||
1961 | if (!F->hasAddressTaken() && !hasMustTailCallers(F) && | |||
1962 | !hasInvokeCallers(F)) { | |||
1963 | RemovePreallocated(F); | |||
1964 | Changed = true; | |||
1965 | } | |||
1966 | continue; | |||
1967 | } | |||
1968 | ||||
1969 | if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) { | |||
1970 | NumInternalFunc++; | |||
1971 | TargetTransformInfo &TTI = GetTTI(*F); | |||
1972 | // Change the calling convention to coldcc if either stress testing is | |||
1973 | // enabled or the target would like to use coldcc on functions which are | |||
1974 | // cold at all call sites and the callers contain no other non coldcc | |||
1975 | // calls. | |||
1976 | if (EnableColdCCStressTest || | |||
1977 | (TTI.useColdCCForColdCall(*F) && | |||
1978 | isValidCandidateForColdCC(*F, GetBFI, AllCallsCold))) { | |||
1979 | F->setCallingConv(CallingConv::Cold); | |||
1980 | changeCallSitesToColdCC(F); | |||
1981 | Changed = true; | |||
1982 | NumColdCC++; | |||
1983 | } | |||
1984 | } | |||
1985 | ||||
1986 | if (hasChangeableCC(F) && !F->isVarArg() && | |||
1987 | !F->hasAddressTaken()) { | |||
1988 | // If this function has a calling convention worth changing, is not a | |||
1989 | // varargs function, and is only called directly, promote it to use the | |||
1990 | // Fast calling convention. | |||
1991 | F->setCallingConv(CallingConv::Fast); | |||
1992 | ChangeCalleesToFastCall(F); | |||
1993 | ++NumFastCallFns; | |||
1994 | Changed = true; | |||
1995 | } | |||
1996 | ||||
1997 | if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && | |||
1998 | !F->hasAddressTaken()) { | |||
1999 | // The function is not used by a trampoline intrinsic, so it is safe | |||
2000 | // to remove the 'nest' attribute. | |||
2001 | RemoveAttribute(F, Attribute::Nest); | |||
2002 | ++NumNestRemoved; | |||
2003 | Changed = true; | |||
2004 | } | |||
2005 | } | |||
2006 | return Changed; | |||
2007 | } | |||
2008 | ||||
2009 | static bool | |||
2010 | OptimizeGlobalVars(Module &M, | |||
2011 | function_ref<TargetLibraryInfo &(Function &)> GetTLI, | |||
2012 | function_ref<DominatorTree &(Function &)> LookupDomTree, | |||
2013 | SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { | |||
2014 | bool Changed = false; | |||
2015 | ||||
2016 | for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); | |||
2017 | GVI != E; ) { | |||
2018 | GlobalVariable *GV = &*GVI++; | |||
2019 | // Global variables without names cannot be referenced outside this module. | |||
2020 | if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage()) | |||
2021 | GV->setLinkage(GlobalValue::InternalLinkage); | |||
2022 | // Simplify the initializer. | |||
2023 | if (GV->hasInitializer()) | |||
2024 | if (auto *C = dyn_cast<Constant>(GV->getInitializer())) { | |||
2025 | auto &DL = M.getDataLayout(); | |||
2026 | // TLI is not used in the case of a Constant, so use default nullptr | |||
2027 | // for that optional parameter, since we don't have a Function to | |||
2028 | // provide GetTLI anyway. | |||
2029 | Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr); | |||
2030 | if (New != C) | |||
2031 | GV->setInitializer(New); | |||
2032 | } | |||
2033 | ||||
2034 | if (deleteIfDead(*GV, NotDiscardableComdats)) { | |||
2035 | Changed = true; | |||
2036 | continue; | |||
2037 | } | |||
2038 | ||||
2039 | Changed |= processGlobal(*GV, GetTLI, LookupDomTree); | |||
2040 | } | |||
2041 | return Changed; | |||
2042 | } | |||
2043 | ||||
2044 | /// Evaluate a piece of a constantexpr store into a global initializer. This | |||
2045 | /// returns 'Init' modified to reflect 'Val' stored into it. At this point, the | |||
2046 | /// GEP operands of Addr [0, OpNo) have been stepped into. | |||
2047 | static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, | |||
2048 | ConstantExpr *Addr, unsigned OpNo) { | |||
2049 | // Base case of the recursion. | |||
2050 | if (OpNo == Addr->getNumOperands()) { | |||
2051 | assert(Val->getType() == Init->getType() && "Type mismatch!")((void)0); | |||
2052 | return Val; | |||
2053 | } | |||
2054 | ||||
2055 | SmallVector<Constant*, 32> Elts; | |||
2056 | if (StructType *STy = dyn_cast<StructType>(Init->getType())) { | |||
2057 | // Break up the constant into its elements. | |||
2058 | for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) | |||
2059 | Elts.push_back(Init->getAggregateElement(i)); | |||
2060 | ||||
2061 | // Replace the element that we are supposed to. | |||
2062 | ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); | |||
2063 | unsigned Idx = CU->getZExtValue(); | |||
2064 | assert(Idx < STy->getNumElements() && "Struct index out of range!")((void)0); | |||
2065 | Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); | |||
2066 | ||||
2067 | // Return the modified struct. | |||
2068 | return ConstantStruct::get(STy, Elts); | |||
2069 | } | |||
2070 | ||||
2071 | ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); | |||
2072 | uint64_t NumElts; | |||
2073 | if (ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) | |||
2074 | NumElts = ATy->getNumElements(); | |||
2075 | else | |||
2076 | NumElts = cast<FixedVectorType>(Init->getType())->getNumElements(); | |||
2077 | ||||
2078 | // Break up the array into elements. | |||
2079 | for (uint64_t i = 0, e = NumElts; i != e; ++i) | |||
2080 | Elts.push_back(Init->getAggregateElement(i)); | |||
2081 | ||||
2082 | assert(CI->getZExtValue() < NumElts)((void)0); | |||
2083 | Elts[CI->getZExtValue()] = | |||
2084 | EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); | |||
2085 | ||||
2086 | if (Init->getType()->isArrayTy()) | |||
2087 | return ConstantArray::get(cast<ArrayType>(Init->getType()), Elts); | |||
2088 | return ConstantVector::get(Elts); | |||
2089 | } | |||
2090 | ||||
2091 | /// We have decided that Addr (which satisfies the predicate | |||
2092 | /// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. | |||
2093 | static void CommitValueTo(Constant *Val, Constant *Addr) { | |||
2094 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { | |||
2095 | assert(GV->hasInitializer())((void)0); | |||
2096 | GV->setInitializer(Val); | |||
2097 | return; | |||
2098 | } | |||
2099 | ||||
2100 | ConstantExpr *CE = cast<ConstantExpr>(Addr); | |||
2101 | GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); | |||
2102 | GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); | |||
2103 | } | |||
2104 | ||||
2105 | /// Given a map of address -> value, where addresses are expected to be some form | |||
2106 | /// of either a global or a constant GEP, set the initializer for the address to | |||
2107 | /// be the value. This performs mostly the same function as CommitValueTo() | |||
2108 | /// and EvaluateStoreInto() but is optimized to be more efficient for the common | |||
2109 | /// case where the set of addresses are GEPs sharing the same underlying global, | |||
2110 | /// processing the GEPs in batches rather than individually. | |||
2111 | /// | |||
2112 | /// To give an example, consider the following C++ code adapted from the clang | |||
2113 | /// regression tests: | |||
2114 | /// struct S { | |||
2115 | /// int n = 10; | |||
2116 | /// int m = 2 * n; | |||
2117 | /// S(int a) : n(a) {} | |||
2118 | /// }; | |||
2119 | /// | |||
2120 | /// template<typename T> | |||
2121 | /// struct U { | |||
2122 | /// T *r = &q; | |||
2123 | /// T q = 42; | |||
2124 | /// U *p = this; | |||
2125 | /// }; | |||
2126 | /// | |||
2127 | /// U<S> e; | |||
2128 | /// | |||
2129 | /// The global static constructor for 'e' will need to initialize 'r' and 'p' of | |||
2130 | /// the outer struct, while also initializing the inner 'q' structs 'n' and 'm' | |||
2131 | /// members. This batch algorithm will simply use general CommitValueTo() method | |||
2132 | /// to handle the complex nested S struct initialization of 'q', before | |||
2133 | /// processing the outermost members in a single batch. Using CommitValueTo() to | |||
2134 | /// handle member in the outer struct is inefficient when the struct/array is | |||
2135 | /// very large as we end up creating and destroy constant arrays for each | |||
2136 | /// initialization. | |||
2137 | /// For the above case, we expect the following IR to be generated: | |||
2138 | /// | |||
2139 | /// %struct.U = type { %struct.S*, %struct.S, %struct.U* } | |||
2140 | /// %struct.S = type { i32, i32 } | |||
2141 | /// @e = global %struct.U { %struct.S* gep inbounds (%struct.U, %struct.U* @e, | |||
2142 | /// i64 0, i32 1), | |||
2143 | /// %struct.S { i32 42, i32 84 }, %struct.U* @e } | |||
2144 | /// The %struct.S { i32 42, i32 84 } inner initializer is treated as a complex | |||
2145 | /// constant expression, while the other two elements of @e are "simple". | |||
2146 | static void BatchCommitValueTo(const DenseMap<Constant*, Constant*> &Mem) { | |||
2147 | SmallVector<std::pair<GlobalVariable*, Constant*>, 32> GVs; | |||
2148 | SmallVector<std::pair<ConstantExpr*, Constant*>, 32> ComplexCEs; | |||
2149 | SmallVector<std::pair<ConstantExpr*, Constant*>, 32> SimpleCEs; | |||
2150 | SimpleCEs.reserve(Mem.size()); | |||
2151 | ||||
2152 | for (const auto &I : Mem) { | |||
2153 | if (auto *GV = dyn_cast<GlobalVariable>(I.first)) { | |||
2154 | GVs.push_back(std::make_pair(GV, I.second)); | |||
2155 | } else { | |||
2156 | ConstantExpr *GEP = cast<ConstantExpr>(I.first); | |||
2157 | // We don't handle the deeply recursive case using the batch method. | |||
2158 | if (GEP->getNumOperands() > 3) | |||
2159 | ComplexCEs.push_back(std::make_pair(GEP, I.second)); | |||
2160 | else | |||
2161 | SimpleCEs.push_back(std::make_pair(GEP, I.second)); | |||
2162 | } | |||
2163 | } | |||
2164 | ||||
2165 | // The algorithm below doesn't handle cases like nested structs, so use the | |||
2166 | // slower fully general method if we have to. | |||
2167 | for (auto ComplexCE : ComplexCEs) | |||
2168 | CommitValueTo(ComplexCE.second, ComplexCE.first); | |||
2169 | ||||
2170 | for (auto GVPair : GVs) { | |||
2171 | assert(GVPair.first->hasInitializer())((void)0); | |||
2172 | GVPair.first->setInitializer(GVPair.second); | |||
2173 | } | |||
2174 | ||||
2175 | if (SimpleCEs.empty()) | |||
2176 | return; | |||
2177 | ||||
2178 | // We cache a single global's initializer elements in the case where the | |||
2179 | // subsequent address/val pair uses the same one. This avoids throwing away and | |||
2180 | // rebuilding the constant struct/vector/array just because one element is | |||
2181 | // modified at a time. | |||
2182 | SmallVector<Constant *, 32> Elts; | |||
2183 | Elts.reserve(SimpleCEs.size()); | |||
2184 | GlobalVariable *CurrentGV = nullptr; | |||
2185 | ||||
2186 | auto commitAndSetupCache = [&](GlobalVariable *GV, bool Update) { | |||
2187 | Constant *Init = GV->getInitializer(); | |||
2188 | Type *Ty = Init->getType(); | |||
2189 | if (Update) { | |||
2190 | if (CurrentGV) { | |||
2191 | assert(CurrentGV && "Expected a GV to commit to!")((void)0); | |||
2192 | Type *CurrentInitTy = CurrentGV->getInitializer()->getType(); | |||
2193 | // We have a valid cache that needs to be committed. | |||
2194 | if (StructType *STy = dyn_cast<StructType>(CurrentInitTy)) | |||
2195 | CurrentGV->setInitializer(ConstantStruct::get(STy, Elts)); | |||
2196 | else if (ArrayType *ArrTy = dyn_cast<ArrayType>(CurrentInitTy)) | |||
2197 | CurrentGV->setInitializer(ConstantArray::get(ArrTy, Elts)); | |||
2198 | else | |||
2199 | CurrentGV->setInitializer(ConstantVector::get(Elts)); | |||
2200 | } | |||
2201 | if (CurrentGV == GV) | |||
2202 | return; | |||
2203 | // Need to clear and set up cache for new initializer. | |||
2204 | CurrentGV = GV; | |||
2205 | Elts.clear(); | |||
2206 | unsigned NumElts; | |||
2207 | if (auto *STy = dyn_cast<StructType>(Ty)) | |||
2208 | NumElts = STy->getNumElements(); | |||
2209 | else if (auto *ATy = dyn_cast<ArrayType>(Ty)) | |||
2210 | NumElts = ATy->getNumElements(); | |||
2211 | else | |||
2212 | NumElts = cast<FixedVectorType>(Ty)->getNumElements(); | |||
2213 | for (unsigned i = 0, e = NumElts; i != e; ++i) | |||
2214 | Elts.push_back(Init->getAggregateElement(i)); | |||
2215 | } | |||
2216 | }; | |||
2217 | ||||
2218 | for (auto CEPair : SimpleCEs) { | |||
2219 | ConstantExpr *GEP = CEPair.first; | |||
2220 | Constant *Val = CEPair.second; | |||
2221 | ||||
2222 | GlobalVariable *GV = cast<GlobalVariable>(GEP->getOperand(0)); | |||
2223 | commitAndSetupCache(GV, GV != CurrentGV); | |||
2224 | ConstantInt *CI = cast<ConstantInt>(GEP->getOperand(2)); | |||
2225 | Elts[CI->getZExtValue()] = Val; | |||
2226 | } | |||
2227 | // The last initializer in the list needs to be committed, others | |||
2228 | // will be committed on a new initializer being processed. | |||
2229 | commitAndSetupCache(CurrentGV, true); | |||
2230 | } | |||
2231 | ||||
2232 | /// Evaluate static constructors in the function, if we can. Return true if we | |||
2233 | /// can, false otherwise. | |||
2234 | static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, | |||
2235 | TargetLibraryInfo *TLI) { | |||
2236 | // Call the function. | |||
2237 | Evaluator Eval(DL, TLI); | |||
2238 | Constant *RetValDummy; | |||
2239 | bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy, | |||
2240 | SmallVector<Constant*, 0>()); | |||
2241 | ||||
2242 | if (EvalSuccess) { | |||
2243 | ++NumCtorsEvaluated; | |||
2244 | ||||
2245 | // We succeeded at evaluation: commit the result. | |||
2246 | LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"do { } while (false) | |||
2247 | << F->getName() << "' to "do { } while (false) | |||
2248 | << Eval.getMutatedMemory().size() << " stores.\n")do { } while (false); | |||
2249 | BatchCommitValueTo(Eval.getMutatedMemory()); | |||
2250 | for (GlobalVariable *GV : Eval.getInvariants()) | |||
2251 | GV->setConstant(true); | |||
2252 | } | |||
2253 | ||||
2254 | return EvalSuccess; | |||
2255 | } | |||
2256 | ||||
2257 | static int compareNames(Constant *const *A, Constant *const *B) { | |||
2258 | Value *AStripped = (*A)->stripPointerCasts(); | |||
2259 | Value *BStripped = (*B)->stripPointerCasts(); | |||
2260 | return AStripped->getName().compare(BStripped->getName()); | |||
2261 | } | |||
2262 | ||||
2263 | static void setUsedInitializer(GlobalVariable &V, | |||
2264 | const SmallPtrSetImpl<GlobalValue *> &Init) { | |||
2265 | if (Init.empty()) { | |||
2266 | V.eraseFromParent(); | |||
2267 | return; | |||
2268 | } | |||
2269 | ||||
2270 | // Type of pointer to the array of pointers. | |||
2271 | PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0); | |||
2272 | ||||
2273 | SmallVector<Constant *, 8> UsedArray; | |||
2274 | for (GlobalValue *GV : Init) { | |||
2275 | Constant *Cast | |||
2276 | = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy); | |||
2277 | UsedArray.push_back(Cast); | |||
2278 | } | |||
2279 | // Sort to get deterministic order. | |||
2280 | array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames); | |||
2281 | ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size()); | |||
2282 | ||||
2283 | Module *M = V.getParent(); | |||
2284 | V.removeFromParent(); | |||
2285 | GlobalVariable *NV = | |||
2286 | new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage, | |||
2287 | ConstantArray::get(ATy, UsedArray), ""); | |||
2288 | NV->takeName(&V); | |||
2289 | NV->setSection("llvm.metadata"); | |||
2290 | delete &V; | |||
2291 | } | |||
2292 | ||||
2293 | namespace { | |||
2294 | ||||
2295 | /// An easy to access representation of llvm.used and llvm.compiler.used. | |||
2296 | class LLVMUsed { | |||
2297 | SmallPtrSet<GlobalValue *, 4> Used; | |||
2298 | SmallPtrSet<GlobalValue *, 4> CompilerUsed; | |||
2299 | GlobalVariable *UsedV; | |||
2300 | GlobalVariable *CompilerUsedV; | |||
2301 | ||||
2302 | public: | |||
2303 | LLVMUsed(Module &M) { | |||
2304 | SmallVector<GlobalValue *, 4> Vec; | |||
2305 | UsedV = collectUsedGlobalVariables(M, Vec, false); | |||
2306 | Used = {Vec.begin(), Vec.end()}; | |||
2307 | Vec.clear(); | |||
2308 | CompilerUsedV = collectUsedGlobalVariables(M, Vec, true); | |||
2309 | CompilerUsed = {Vec.begin(), Vec.end()}; | |||
2310 | } | |||
2311 | ||||
2312 | using iterator = SmallPtrSet<GlobalValue *, 4>::iterator; | |||
2313 | using used_iterator_range = iterator_range<iterator>; | |||
2314 | ||||
2315 | iterator usedBegin() { return Used.begin(); } | |||
2316 | iterator usedEnd() { return Used.end(); } | |||
2317 | ||||
2318 | used_iterator_range used() { | |||
2319 | return used_iterator_range(usedBegin(), usedEnd()); | |||
2320 | } | |||
2321 | ||||
2322 | iterator compilerUsedBegin() { return CompilerUsed.begin(); } | |||
2323 | iterator compilerUsedEnd() { return CompilerUsed.end(); } | |||
2324 | ||||
2325 | used_iterator_range compilerUsed() { | |||
2326 | return used_iterator_range(compilerUsedBegin(), compilerUsedEnd()); | |||
2327 | } | |||
2328 | ||||
2329 | bool usedCount(GlobalValue *GV) const { return Used.count(GV); } | |||
2330 | ||||
2331 | bool compilerUsedCount(GlobalValue *GV) const { | |||
2332 | return CompilerUsed.count(GV); | |||
2333 | } | |||
2334 | ||||
2335 | bool usedErase(GlobalValue *GV) { return Used.erase(GV); } | |||
2336 | bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); } | |||
2337 | bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; } | |||
2338 | ||||
2339 | bool compilerUsedInsert(GlobalValue *GV) { | |||
2340 | return CompilerUsed.insert(GV).second; | |||
2341 | } | |||
2342 | ||||
2343 | void syncVariablesAndSets() { | |||
2344 | if (UsedV) | |||
2345 | setUsedInitializer(*UsedV, Used); | |||
2346 | if (CompilerUsedV) | |||
2347 | setUsedInitializer(*CompilerUsedV, CompilerUsed); | |||
2348 | } | |||
2349 | }; | |||
2350 | ||||
2351 | } // end anonymous namespace | |||
2352 | ||||
2353 | static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) { | |||
2354 | if (GA.use_empty()) // No use at all. | |||
2355 | return false; | |||
2356 | ||||
2357 | assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&((void)0) | |||
2358 | "We should have removed the duplicated "((void)0) | |||
2359 | "element from llvm.compiler.used")((void)0); | |||
2360 | if (!GA.hasOneUse()) | |||
2361 | // Strictly more than one use. So at least one is not in llvm.used and | |||
2362 | // llvm.compiler.used. | |||
2363 | return true; | |||
2364 | ||||
2365 | // Exactly one use. Check if it is in llvm.used or llvm.compiler.used. | |||
2366 | return !U.usedCount(&GA) && !U.compilerUsedCount(&GA); | |||
2367 | } | |||
2368 | ||||
2369 | static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V, | |||
2370 | const LLVMUsed &U) { | |||
2371 | unsigned N = 2; | |||
2372 | assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&((void)0) | |||
2373 | "We should have removed the duplicated "((void)0) | |||
2374 | "element from llvm.compiler.used")((void)0); | |||
2375 | if (U.usedCount(&V) || U.compilerUsedCount(&V)) | |||
2376 | ++N; | |||
2377 | return V.hasNUsesOrMore(N); | |||
2378 | } | |||
2379 | ||||
2380 | static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) { | |||
2381 | if (!GA.hasLocalLinkage()) | |||
2382 | return true; | |||
2383 | ||||
2384 | return U.usedCount(&GA) || U.compilerUsedCount(&GA); | |||
2385 | } | |||
2386 | ||||
2387 | static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U, | |||
2388 | bool &RenameTarget) { | |||
2389 | RenameTarget = false; | |||
2390 | bool Ret = false; | |||
2391 | if (hasUseOtherThanLLVMUsed(GA, U)) | |||
2392 | Ret = true; | |||
2393 | ||||
2394 | // If the alias is externally visible, we may still be able to simplify it. | |||
2395 | if (!mayHaveOtherReferences(GA, U)) | |||
2396 | return Ret; | |||
2397 | ||||
2398 | // If the aliasee has internal linkage, give it the name and linkage | |||
2399 | // of the alias, and delete the alias. This turns: | |||
2400 | // define internal ... @f(...) | |||
2401 | // @a = alias ... @f | |||
2402 | // into: | |||
2403 | // define ... @a(...) | |||
2404 | Constant *Aliasee = GA.getAliasee(); | |||
2405 | GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); | |||
2406 | if (!Target->hasLocalLinkage()) | |||
2407 | return Ret; | |||
2408 | ||||
2409 | // Do not perform the transform if multiple aliases potentially target the | |||
2410 | // aliasee. This check also ensures that it is safe to replace the section | |||
2411 | // and other attributes of the aliasee with those of the alias. | |||
2412 | if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U)) | |||
2413 | return Ret; | |||
2414 | ||||
2415 | RenameTarget = true; | |||
2416 | return true; | |||
2417 | } | |||
2418 | ||||
2419 | static bool | |||
2420 | OptimizeGlobalAliases(Module &M, | |||
2421 | SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { | |||
2422 | bool Changed = false; | |||
2423 | LLVMUsed Used(M); | |||
2424 | ||||
2425 | for (GlobalValue *GV : Used.used()) | |||
2426 | Used.compilerUsedErase(GV); | |||
2427 | ||||
2428 | for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); | |||
2429 | I != E;) { | |||
2430 | GlobalAlias *J = &*I++; | |||
2431 | ||||
2432 | // Aliases without names cannot be referenced outside this module. | |||
2433 | if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage()) | |||
2434 | J->setLinkage(GlobalValue::InternalLinkage); | |||
2435 | ||||
2436 | if (deleteIfDead(*J, NotDiscardableComdats)) { | |||
2437 | Changed = true; | |||
2438 | continue; | |||
2439 | } | |||
2440 | ||||
2441 | // If the alias can change at link time, nothing can be done - bail out. | |||
2442 | if (J->isInterposable()) | |||
2443 | continue; | |||
2444 | ||||
2445 | Constant *Aliasee = J->getAliasee(); | |||
2446 | GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts()); | |||
2447 | // We can't trivially replace the alias with the aliasee if the aliasee is | |||
2448 | // non-trivial in some way. We also can't replace the alias with the aliasee | |||
2449 | // if the aliasee is interposable because aliases point to the local | |||
2450 | // definition. | |||
2451 | // TODO: Try to handle non-zero GEPs of local aliasees. | |||
2452 | if (!Target || Target->isInterposable()) | |||
2453 | continue; | |||
2454 | Target->removeDeadConstantUsers(); | |||
2455 | ||||
2456 | // Make all users of the alias use the aliasee instead. | |||
2457 | bool RenameTarget; | |||
2458 | if (!hasUsesToReplace(*J, Used, RenameTarget)) | |||
2459 | continue; | |||
2460 | ||||
2461 | J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType())); | |||
2462 | ++NumAliasesResolved; | |||
2463 | Changed = true; | |||
2464 | ||||
2465 | if (RenameTarget) { | |||
2466 | // Give the aliasee the name, linkage and other attributes of the alias. | |||
2467 | Target->takeName(&*J); | |||
2468 | Target->setLinkage(J->getLinkage()); | |||
2469 | Target->setDSOLocal(J->isDSOLocal()); | |||
2470 | Target->setVisibility(J->getVisibility()); | |||
2471 | Target->setDLLStorageClass(J->getDLLStorageClass()); | |||
2472 | ||||
2473 | if (Used.usedErase(&*J)) | |||
2474 | Used.usedInsert(Target); | |||
2475 | ||||
2476 | if (Used.compilerUsedErase(&*J)) | |||
2477 | Used.compilerUsedInsert(Target); | |||
2478 | } else if (mayHaveOtherReferences(*J, Used)) | |||
2479 | continue; | |||
2480 | ||||
2481 | // Delete the alias. | |||
2482 | M.getAliasList().erase(J); | |||
2483 | ++NumAliasesRemoved; | |||
2484 | Changed = true; | |||
2485 | } | |||
2486 | ||||
2487 | Used.syncVariablesAndSets(); | |||
2488 | ||||
2489 | return Changed; | |||
2490 | } | |||
2491 | ||||
2492 | static Function * | |||
2493 | FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { | |||
2494 | // Hack to get a default TLI before we have actual Function. | |||
2495 | auto FuncIter = M.begin(); | |||
2496 | if (FuncIter == M.end()) | |||
2497 | return nullptr; | |||
2498 | auto *TLI = &GetTLI(*FuncIter); | |||
2499 | ||||
2500 | LibFunc F = LibFunc_cxa_atexit; | |||
2501 | if (!TLI->has(F)) | |||
2502 | return nullptr; | |||
2503 | ||||
2504 | Function *Fn = M.getFunction(TLI->getName(F)); | |||
2505 | if (!Fn) | |||
2506 | return nullptr; | |||
2507 | ||||
2508 | // Now get the actual TLI for Fn. | |||
2509 | TLI = &GetTLI(*Fn); | |||
2510 | ||||
2511 | // Make sure that the function has the correct prototype. | |||
2512 | if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit) | |||
2513 | return nullptr; | |||
2514 | ||||
2515 | return Fn; | |||
2516 | } | |||
2517 | ||||
2518 | /// Returns whether the given function is an empty C++ destructor and can | |||
2519 | /// therefore be eliminated. | |||
2520 | /// Note that we assume that other optimization passes have already simplified | |||
2521 | /// the code so we simply check for 'ret'. | |||
2522 | static bool cxxDtorIsEmpty(const Function &Fn) { | |||
2523 | // FIXME: We could eliminate C++ destructors if they're readonly/readnone and | |||
2524 | // nounwind, but that doesn't seem worth doing. | |||
2525 | if (Fn.isDeclaration()) | |||
2526 | return false; | |||
2527 | ||||
2528 | for (auto &I : Fn.getEntryBlock()) { | |||
2529 | if (isa<DbgInfoIntrinsic>(I)) | |||
2530 | continue; | |||
2531 | if (isa<ReturnInst>(I)) | |||
2532 | return true; | |||
2533 | break; | |||
2534 | } | |||
2535 | return false; | |||
2536 | } | |||
2537 | ||||
2538 | static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { | |||
2539 | /// Itanium C++ ABI p3.3.5: | |||
2540 | /// | |||
2541 | /// After constructing a global (or local static) object, that will require | |||
2542 | /// destruction on exit, a termination function is registered as follows: | |||
2543 | /// | |||
2544 | /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d ); | |||
2545 | /// | |||
2546 | /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the | |||
2547 | /// call f(p) when DSO d is unloaded, before all such termination calls | |||
2548 | /// registered before this one. It returns zero if registration is | |||
2549 | /// successful, nonzero on failure. | |||
2550 | ||||
2551 | // This pass will look for calls to __cxa_atexit where the function is trivial | |||
2552 | // and remove them. | |||
2553 | bool Changed = false; | |||
2554 | ||||
2555 | for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end(); | |||
2556 | I != E;) { | |||
2557 | // We're only interested in calls. Theoretically, we could handle invoke | |||
2558 | // instructions as well, but neither llvm-gcc nor clang generate invokes | |||
2559 | // to __cxa_atexit. | |||
2560 | CallInst *CI = dyn_cast<CallInst>(*I++); | |||
2561 | if (!CI) | |||
2562 | continue; | |||
2563 | ||||
2564 | Function *DtorFn = | |||
2565 | dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts()); | |||
2566 | if (!DtorFn || !cxxDtorIsEmpty(*DtorFn)) | |||
2567 | continue; | |||
2568 | ||||
2569 | // Just remove the call. | |||
2570 | CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); | |||
2571 | CI->eraseFromParent(); | |||
2572 | ||||
2573 | ++NumCXXDtorsRemoved; | |||
2574 | ||||
2575 | Changed |= true; | |||
2576 | } | |||
2577 | ||||
2578 | return Changed; | |||
2579 | } | |||
2580 | ||||
2581 | static bool optimizeGlobalsInModule( | |||
2582 | Module &M, const DataLayout &DL, | |||
2583 | function_ref<TargetLibraryInfo &(Function &)> GetTLI, | |||
2584 | function_ref<TargetTransformInfo &(Function &)> GetTTI, | |||
2585 | function_ref<BlockFrequencyInfo &(Function &)> GetBFI, | |||
2586 | function_ref<DominatorTree &(Function &)> LookupDomTree) { | |||
2587 | SmallPtrSet<const Comdat *, 8> NotDiscardableComdats; | |||
2588 | bool Changed = false; | |||
2589 | bool LocalChange = true; | |||
2590 | while (LocalChange) { | |||
2591 | LocalChange = false; | |||
2592 | ||||
2593 | NotDiscardableComdats.clear(); | |||
2594 | for (const GlobalVariable &GV : M.globals()) | |||
2595 | if (const Comdat *C = GV.getComdat()) | |||
2596 | if (!GV.isDiscardableIfUnused() || !GV.use_empty()) | |||
2597 | NotDiscardableComdats.insert(C); | |||
2598 | for (Function &F : M) | |||
2599 | if (const Comdat *C = F.getComdat()) | |||
2600 | if (!F.isDefTriviallyDead()) | |||
2601 | NotDiscardableComdats.insert(C); | |||
2602 | for (GlobalAlias &GA : M.aliases()) | |||
2603 | if (const Comdat *C = GA.getComdat()) | |||
2604 | if (!GA.isDiscardableIfUnused() || !GA.use_empty()) | |||
2605 | NotDiscardableComdats.insert(C); | |||
2606 | ||||
2607 | // Delete functions that are trivially dead, ccc -> fastcc | |||
2608 | LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree, | |||
2609 | NotDiscardableComdats); | |||
2610 | ||||
2611 | // Optimize global_ctors list. | |||
2612 | LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { | |||
2613 | return EvaluateStaticConstructor(F, DL, &GetTLI(*F)); | |||
2614 | }); | |||
2615 | ||||
2616 | // Optimize non-address-taken globals. | |||
2617 | LocalChange |= | |||
2618 | OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats); | |||
2619 | ||||
2620 | // Resolve aliases, when possible. | |||
2621 | LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats); | |||
2622 | ||||
2623 | // Try to remove trivial global destructors if they are not removed | |||
2624 | // already. | |||
2625 | Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI); | |||
2626 | if (CXAAtExitFn) | |||
2627 | LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); | |||
2628 | ||||
2629 | Changed |= LocalChange; | |||
2630 | } | |||
2631 | ||||
2632 | // TODO: Move all global ctors functions to the end of the module for code | |||
2633 | // layout. | |||
2634 | ||||
2635 | return Changed; | |||
2636 | } | |||
2637 | ||||
2638 | PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { | |||
2639 | auto &DL = M.getDataLayout(); | |||
2640 | auto &FAM = | |||
2641 | AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); | |||
2642 | auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{ | |||
2643 | return FAM.getResult<DominatorTreeAnalysis>(F); | |||
2644 | }; | |||
2645 | auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { | |||
2646 | return FAM.getResult<TargetLibraryAnalysis>(F); | |||
2647 | }; | |||
2648 | auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { | |||
2649 | return FAM.getResult<TargetIRAnalysis>(F); | |||
2650 | }; | |||
2651 | ||||
2652 | auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { | |||
2653 | return FAM.getResult<BlockFrequencyAnalysis>(F); | |||
2654 | }; | |||
2655 | ||||
2656 | if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree)) | |||
2657 | return PreservedAnalyses::all(); | |||
2658 | return PreservedAnalyses::none(); | |||
2659 | } | |||
2660 | ||||
2661 | namespace { | |||
2662 | ||||
2663 | struct GlobalOptLegacyPass : public ModulePass { | |||
2664 | static char ID; // Pass identification, replacement for typeid | |||
2665 | ||||
2666 | GlobalOptLegacyPass() : ModulePass(ID) { | |||
2667 | initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry()); | |||
2668 | } | |||
2669 | ||||
2670 | bool runOnModule(Module &M) override { | |||
2671 | if (skipModule(M)) | |||
| ||||
2672 | return false; | |||
2673 | ||||
2674 | auto &DL = M.getDataLayout(); | |||
2675 | auto LookupDomTree = [this](Function &F) -> DominatorTree & { | |||
2676 | return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); | |||
2677 | }; | |||
2678 | auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { | |||
2679 | return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); | |||
2680 | }; | |||
2681 | auto GetTTI = [this](Function &F) -> TargetTransformInfo & { | |||
2682 | return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); | |||
2683 | }; | |||
2684 | ||||
2685 | auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & { | |||
2686 | return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); | |||
2687 | }; | |||
2688 | ||||
2689 | return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, | |||
2690 | LookupDomTree); | |||
2691 | } | |||
2692 | ||||
2693 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
2694 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | |||
2695 | AU.addRequired<TargetTransformInfoWrapperPass>(); | |||
2696 | AU.addRequired<DominatorTreeWrapperPass>(); | |||
2697 | AU.addRequired<BlockFrequencyInfoWrapperPass>(); | |||
2698 | } | |||
2699 | }; | |||
2700 | ||||
2701 | } // end anonymous namespace | |||
2702 | ||||
2703 | char GlobalOptLegacyPass::ID = 0; | |||
2704 | ||||
2705 | INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",static void *initializeGlobalOptLegacyPassPassOnce(PassRegistry &Registry) { | |||
2706 | "Global Variable Optimizer", false, false)static void *initializeGlobalOptLegacyPassPassOnce(PassRegistry &Registry) { | |||
2707 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); | |||
2708 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry); | |||
2709 | INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)initializeBlockFrequencyInfoWrapperPassPass(Registry); | |||
2710 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | |||
2711 | INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",PassInfo *PI = new PassInfo( "Global Variable Optimizer", "globalopt" , &GlobalOptLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <GlobalOptLegacyPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeGlobalOptLegacyPassPassFlag ; void llvm::initializeGlobalOptLegacyPassPass(PassRegistry & Registry) { llvm::call_once(InitializeGlobalOptLegacyPassPassFlag , initializeGlobalOptLegacyPassPassOnce, std::ref(Registry)); } | |||
2712 | "Global Variable Optimizer", false, false)PassInfo *PI = new PassInfo( "Global Variable Optimizer", "globalopt" , &GlobalOptLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <GlobalOptLegacyPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeGlobalOptLegacyPassPassFlag ; void llvm::initializeGlobalOptLegacyPassPass(PassRegistry & Registry) { llvm::call_once(InitializeGlobalOptLegacyPassPassFlag , initializeGlobalOptLegacyPassPassOnce, std::ref(Registry)); } | |||
2713 | ||||
2714 | ModulePass *llvm::createGlobalOptimizerPass() { | |||
2715 | return new GlobalOptLegacyPass(); | |||
2716 | } |