File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/IPO/GlobalOpt.cpp |
Warning: | line 2187, column 22 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This pass transforms simple global variables that never have their address | |||
10 | // taken. If obviously true, it marks read/write globals as constant, deletes | |||
11 | // variables only stored to, etc. | |||
12 | // | |||
13 | //===----------------------------------------------------------------------===// | |||
14 | ||||
15 | #include "llvm/Transforms/IPO/GlobalOpt.h" | |||
16 | #include "llvm/ADT/DenseMap.h" | |||
17 | #include "llvm/ADT/STLExtras.h" | |||
18 | #include "llvm/ADT/SmallPtrSet.h" | |||
19 | #include "llvm/ADT/SmallVector.h" | |||
20 | #include "llvm/ADT/Statistic.h" | |||
21 | #include "llvm/ADT/Twine.h" | |||
22 | #include "llvm/ADT/iterator_range.h" | |||
23 | #include "llvm/Analysis/BlockFrequencyInfo.h" | |||
24 | #include "llvm/Analysis/ConstantFolding.h" | |||
25 | #include "llvm/Analysis/MemoryBuiltins.h" | |||
26 | #include "llvm/Analysis/TargetLibraryInfo.h" | |||
27 | #include "llvm/Analysis/TargetTransformInfo.h" | |||
28 | #include "llvm/BinaryFormat/Dwarf.h" | |||
29 | #include "llvm/IR/Attributes.h" | |||
30 | #include "llvm/IR/BasicBlock.h" | |||
31 | #include "llvm/IR/CallingConv.h" | |||
32 | #include "llvm/IR/Constant.h" | |||
33 | #include "llvm/IR/Constants.h" | |||
34 | #include "llvm/IR/DataLayout.h" | |||
35 | #include "llvm/IR/DebugInfoMetadata.h" | |||
36 | #include "llvm/IR/DerivedTypes.h" | |||
37 | #include "llvm/IR/Dominators.h" | |||
38 | #include "llvm/IR/Function.h" | |||
39 | #include "llvm/IR/GetElementPtrTypeIterator.h" | |||
40 | #include "llvm/IR/GlobalAlias.h" | |||
41 | #include "llvm/IR/GlobalValue.h" | |||
42 | #include "llvm/IR/GlobalVariable.h" | |||
43 | #include "llvm/IR/IRBuilder.h" | |||
44 | #include "llvm/IR/InstrTypes.h" | |||
45 | #include "llvm/IR/Instruction.h" | |||
46 | #include "llvm/IR/Instructions.h" | |||
47 | #include "llvm/IR/IntrinsicInst.h" | |||
48 | #include "llvm/IR/Module.h" | |||
49 | #include "llvm/IR/Operator.h" | |||
50 | #include "llvm/IR/Type.h" | |||
51 | #include "llvm/IR/Use.h" | |||
52 | #include "llvm/IR/User.h" | |||
53 | #include "llvm/IR/Value.h" | |||
54 | #include "llvm/IR/ValueHandle.h" | |||
55 | #include "llvm/InitializePasses.h" | |||
56 | #include "llvm/Pass.h" | |||
57 | #include "llvm/Support/AtomicOrdering.h" | |||
58 | #include "llvm/Support/Casting.h" | |||
59 | #include "llvm/Support/CommandLine.h" | |||
60 | #include "llvm/Support/Debug.h" | |||
61 | #include "llvm/Support/ErrorHandling.h" | |||
62 | #include "llvm/Support/MathExtras.h" | |||
63 | #include "llvm/Support/raw_ostream.h" | |||
64 | #include "llvm/Transforms/IPO.h" | |||
65 | #include "llvm/Transforms/Utils/CtorUtils.h" | |||
66 | #include "llvm/Transforms/Utils/Evaluator.h" | |||
67 | #include "llvm/Transforms/Utils/GlobalStatus.h" | |||
68 | #include "llvm/Transforms/Utils/Local.h" | |||
69 | #include <cassert> | |||
70 | #include <cstdint> | |||
71 | #include <utility> | |||
72 | #include <vector> | |||
73 | ||||
74 | using namespace llvm; | |||
75 | ||||
76 | #define DEBUG_TYPE"globalopt" "globalopt" | |||
77 | ||||
78 | STATISTIC(NumMarked , "Number of globals marked constant")static llvm::Statistic NumMarked = {"globalopt", "NumMarked", "Number of globals marked constant"}; | |||
79 | STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr")static llvm::Statistic NumUnnamed = {"globalopt", "NumUnnamed" , "Number of globals marked unnamed_addr"}; | |||
80 | STATISTIC(NumSRA , "Number of aggregate globals broken into scalars")static llvm::Statistic NumSRA = {"globalopt", "NumSRA", "Number of aggregate globals broken into scalars" }; | |||
81 | STATISTIC(NumSubstitute,"Number of globals with initializers stored into them")static llvm::Statistic NumSubstitute = {"globalopt", "NumSubstitute" , "Number of globals with initializers stored into them"}; | |||
82 | STATISTIC(NumDeleted , "Number of globals deleted")static llvm::Statistic NumDeleted = {"globalopt", "NumDeleted" , "Number of globals deleted"}; | |||
83 | STATISTIC(NumGlobUses , "Number of global uses devirtualized")static llvm::Statistic NumGlobUses = {"globalopt", "NumGlobUses" , "Number of global uses devirtualized"}; | |||
84 | STATISTIC(NumLocalized , "Number of globals localized")static llvm::Statistic NumLocalized = {"globalopt", "NumLocalized" , "Number of globals localized"}; | |||
85 | STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans")static llvm::Statistic NumShrunkToBool = {"globalopt", "NumShrunkToBool" , "Number of global vars shrunk to booleans"}; | |||
86 | STATISTIC(NumFastCallFns , "Number of functions converted to fastcc")static llvm::Statistic NumFastCallFns = {"globalopt", "NumFastCallFns" , "Number of functions converted to fastcc"}; | |||
87 | STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated")static llvm::Statistic NumCtorsEvaluated = {"globalopt", "NumCtorsEvaluated" , "Number of static ctors evaluated"}; | |||
88 | STATISTIC(NumNestRemoved , "Number of nest attributes removed")static llvm::Statistic NumNestRemoved = {"globalopt", "NumNestRemoved" , "Number of nest attributes removed"}; | |||
89 | STATISTIC(NumAliasesResolved, "Number of global aliases resolved")static llvm::Statistic NumAliasesResolved = {"globalopt", "NumAliasesResolved" , "Number of global aliases resolved"}; | |||
90 | STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated")static llvm::Statistic NumAliasesRemoved = {"globalopt", "NumAliasesRemoved" , "Number of global aliases eliminated"}; | |||
91 | STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed")static llvm::Statistic NumCXXDtorsRemoved = {"globalopt", "NumCXXDtorsRemoved" , "Number of global C++ destructors removed"}; | |||
92 | STATISTIC(NumInternalFunc, "Number of internal functions")static llvm::Statistic NumInternalFunc = {"globalopt", "NumInternalFunc" , "Number of internal functions"}; | |||
93 | STATISTIC(NumColdCC, "Number of functions marked coldcc")static llvm::Statistic NumColdCC = {"globalopt", "NumColdCC", "Number of functions marked coldcc"}; | |||
94 | ||||
95 | static cl::opt<bool> | |||
96 | EnableColdCCStressTest("enable-coldcc-stress-test", | |||
97 | cl::desc("Enable stress test of coldcc by adding " | |||
98 | "calling conv to all internal functions."), | |||
99 | cl::init(false), cl::Hidden); | |||
100 | ||||
101 | static cl::opt<int> ColdCCRelFreq( | |||
102 | "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, | |||
103 | cl::desc( | |||
104 | "Maximum block frequency, expressed as a percentage of caller's " | |||
105 | "entry frequency, for a call site to be considered cold for enabling" | |||
106 | "coldcc")); | |||
107 | ||||
108 | /// Is this global variable possibly used by a leak checker as a root? If so, | |||
109 | /// we might not really want to eliminate the stores to it. | |||
110 | static bool isLeakCheckerRoot(GlobalVariable *GV) { | |||
111 | // A global variable is a root if it is a pointer, or could plausibly contain | |||
112 | // a pointer. There are two challenges; one is that we could have a struct | |||
113 | // the has an inner member which is a pointer. We recurse through the type to | |||
114 | // detect these (up to a point). The other is that we may actually be a union | |||
115 | // of a pointer and another type, and so our LLVM type is an integer which | |||
116 | // gets converted into a pointer, or our type is an [i8 x #] with a pointer | |||
117 | // potentially contained here. | |||
118 | ||||
119 | if (GV->hasPrivateLinkage()) | |||
120 | return false; | |||
121 | ||||
122 | SmallVector<Type *, 4> Types; | |||
123 | Types.push_back(GV->getValueType()); | |||
124 | ||||
125 | unsigned Limit = 20; | |||
126 | do { | |||
127 | Type *Ty = Types.pop_back_val(); | |||
128 | switch (Ty->getTypeID()) { | |||
129 | default: break; | |||
130 | case Type::PointerTyID: | |||
131 | return true; | |||
132 | case Type::FixedVectorTyID: | |||
133 | case Type::ScalableVectorTyID: | |||
134 | if (cast<VectorType>(Ty)->getElementType()->isPointerTy()) | |||
135 | return true; | |||
136 | break; | |||
137 | case Type::ArrayTyID: | |||
138 | Types.push_back(cast<ArrayType>(Ty)->getElementType()); | |||
139 | break; | |||
140 | case Type::StructTyID: { | |||
141 | StructType *STy = cast<StructType>(Ty); | |||
142 | if (STy->isOpaque()) return true; | |||
143 | for (StructType::element_iterator I = STy->element_begin(), | |||
144 | E = STy->element_end(); I != E; ++I) { | |||
145 | Type *InnerTy = *I; | |||
146 | if (isa<PointerType>(InnerTy)) return true; | |||
147 | if (isa<StructType>(InnerTy) || isa<ArrayType>(InnerTy) || | |||
148 | isa<VectorType>(InnerTy)) | |||
149 | Types.push_back(InnerTy); | |||
150 | } | |||
151 | break; | |||
152 | } | |||
153 | } | |||
154 | if (--Limit == 0) return true; | |||
155 | } while (!Types.empty()); | |||
156 | return false; | |||
157 | } | |||
158 | ||||
159 | /// Given a value that is stored to a global but never read, determine whether | |||
160 | /// it's safe to remove the store and the chain of computation that feeds the | |||
161 | /// store. | |||
162 | static bool IsSafeComputationToRemove( | |||
163 | Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { | |||
164 | do { | |||
165 | if (isa<Constant>(V)) | |||
166 | return true; | |||
167 | if (!V->hasOneUse()) | |||
168 | return false; | |||
169 | if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) || | |||
170 | isa<GlobalValue>(V)) | |||
171 | return false; | |||
172 | if (isAllocationFn(V, GetTLI)) | |||
173 | return true; | |||
174 | ||||
175 | Instruction *I = cast<Instruction>(V); | |||
176 | if (I->mayHaveSideEffects()) | |||
177 | return false; | |||
178 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { | |||
179 | if (!GEP->hasAllConstantIndices()) | |||
180 | return false; | |||
181 | } else if (I->getNumOperands() != 1) { | |||
182 | return false; | |||
183 | } | |||
184 | ||||
185 | V = I->getOperand(0); | |||
186 | } while (true); | |||
187 | } | |||
188 | ||||
189 | /// This GV is a pointer root. Loop over all users of the global and clean up | |||
190 | /// any that obviously don't assign the global a value that isn't dynamically | |||
191 | /// allocated. | |||
192 | static bool | |||
193 | CleanupPointerRootUsers(GlobalVariable *GV, | |||
194 | function_ref<TargetLibraryInfo &(Function &)> GetTLI) { | |||
195 | // A brief explanation of leak checkers. The goal is to find bugs where | |||
196 | // pointers are forgotten, causing an accumulating growth in memory | |||
197 | // usage over time. The common strategy for leak checkers is to explicitly | |||
198 | // allow the memory pointed to by globals at exit. This is popular because it | |||
199 | // also solves another problem where the main thread of a C++ program may shut | |||
200 | // down before other threads that are still expecting to use those globals. To | |||
201 | // handle that case, we expect the program may create a singleton and never | |||
202 | // destroy it. | |||
203 | ||||
204 | bool Changed = false; | |||
205 | ||||
206 | // If Dead[n].first is the only use of a malloc result, we can delete its | |||
207 | // chain of computation and the store to the global in Dead[n].second. | |||
208 | SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead; | |||
209 | ||||
210 | // Constants can't be pointers to dynamically allocated memory. | |||
211 | for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end(); | |||
212 | UI != E;) { | |||
213 | User *U = *UI++; | |||
214 | if (StoreInst *SI = dyn_cast<StoreInst>(U)) { | |||
215 | Value *V = SI->getValueOperand(); | |||
216 | if (isa<Constant>(V)) { | |||
217 | Changed = true; | |||
218 | SI->eraseFromParent(); | |||
219 | } else if (Instruction *I = dyn_cast<Instruction>(V)) { | |||
220 | if (I->hasOneUse()) | |||
221 | Dead.push_back(std::make_pair(I, SI)); | |||
222 | } | |||
223 | } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) { | |||
224 | if (isa<Constant>(MSI->getValue())) { | |||
225 | Changed = true; | |||
226 | MSI->eraseFromParent(); | |||
227 | } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) { | |||
228 | if (I->hasOneUse()) | |||
229 | Dead.push_back(std::make_pair(I, MSI)); | |||
230 | } | |||
231 | } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) { | |||
232 | GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource()); | |||
233 | if (MemSrc && MemSrc->isConstant()) { | |||
234 | Changed = true; | |||
235 | MTI->eraseFromParent(); | |||
236 | } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) { | |||
237 | if (I->hasOneUse()) | |||
238 | Dead.push_back(std::make_pair(I, MTI)); | |||
239 | } | |||
240 | } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { | |||
241 | if (CE->use_empty()) { | |||
242 | CE->destroyConstant(); | |||
243 | Changed = true; | |||
244 | } | |||
245 | } else if (Constant *C = dyn_cast<Constant>(U)) { | |||
246 | if (isSafeToDestroyConstant(C)) { | |||
247 | C->destroyConstant(); | |||
248 | // This could have invalidated UI, start over from scratch. | |||
249 | Dead.clear(); | |||
250 | CleanupPointerRootUsers(GV, GetTLI); | |||
251 | return true; | |||
252 | } | |||
253 | } | |||
254 | } | |||
255 | ||||
256 | for (int i = 0, e = Dead.size(); i != e; ++i) { | |||
257 | if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) { | |||
258 | Dead[i].second->eraseFromParent(); | |||
259 | Instruction *I = Dead[i].first; | |||
260 | do { | |||
261 | if (isAllocationFn(I, GetTLI)) | |||
262 | break; | |||
263 | Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); | |||
264 | if (!J) | |||
265 | break; | |||
266 | I->eraseFromParent(); | |||
267 | I = J; | |||
268 | } while (true); | |||
269 | I->eraseFromParent(); | |||
270 | Changed = true; | |||
271 | } | |||
272 | } | |||
273 | ||||
274 | return Changed; | |||
275 | } | |||
276 | ||||
277 | /// We just marked GV constant. Loop over all users of the global, cleaning up | |||
278 | /// the obvious ones. This is largely just a quick scan over the use list to | |||
279 | /// clean up the easy and obvious cruft. This returns true if it made a change. | |||
280 | static bool CleanupConstantGlobalUsers( | |||
281 | Value *V, Constant *Init, const DataLayout &DL, | |||
282 | function_ref<TargetLibraryInfo &(Function &)> GetTLI) { | |||
283 | bool Changed = false; | |||
284 | // Note that we need to use a weak value handle for the worklist items. When | |||
285 | // we delete a constant array, we may also be holding pointer to one of its | |||
286 | // elements (or an element of one of its elements if we're dealing with an | |||
287 | // array of arrays) in the worklist. | |||
288 | SmallVector<WeakTrackingVH, 8> WorkList(V->users()); | |||
289 | while (!WorkList.empty()) { | |||
290 | Value *UV = WorkList.pop_back_val(); | |||
291 | if (!UV) | |||
292 | continue; | |||
293 | ||||
294 | User *U = cast<User>(UV); | |||
295 | ||||
296 | if (LoadInst *LI = dyn_cast<LoadInst>(U)) { | |||
297 | if (Init) { | |||
298 | if (auto *Casted = | |||
299 | ConstantFoldLoadThroughBitcast(Init, LI->getType(), DL)) { | |||
300 | // Replace the load with the initializer. | |||
301 | LI->replaceAllUsesWith(Casted); | |||
302 | LI->eraseFromParent(); | |||
303 | Changed = true; | |||
304 | } | |||
305 | } | |||
306 | } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { | |||
307 | // Store must be unreachable or storing Init into the global. | |||
308 | SI->eraseFromParent(); | |||
309 | Changed = true; | |||
310 | } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { | |||
311 | if (CE->getOpcode() == Instruction::GetElementPtr) { | |||
312 | Constant *SubInit = nullptr; | |||
313 | if (Init) | |||
314 | SubInit = ConstantFoldLoadThroughGEPConstantExpr( | |||
315 | Init, CE, V->getType()->getPointerElementType(), DL); | |||
316 | Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI); | |||
317 | } else if ((CE->getOpcode() == Instruction::BitCast && | |||
318 | CE->getType()->isPointerTy()) || | |||
319 | CE->getOpcode() == Instruction::AddrSpaceCast) { | |||
320 | // Pointer cast, delete any stores and memsets to the global. | |||
321 | Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI); | |||
322 | } | |||
323 | ||||
324 | if (CE->use_empty()) { | |||
325 | CE->destroyConstant(); | |||
326 | Changed = true; | |||
327 | } | |||
328 | } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { | |||
329 | // Do not transform "gepinst (gep constexpr (GV))" here, because forming | |||
330 | // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold | |||
331 | // and will invalidate our notion of what Init is. | |||
332 | Constant *SubInit = nullptr; | |||
333 | if (!isa<ConstantExpr>(GEP->getOperand(0))) { | |||
334 | ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>( | |||
335 | ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction()))); | |||
336 | if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) | |||
337 | SubInit = ConstantFoldLoadThroughGEPConstantExpr( | |||
338 | Init, CE, V->getType()->getPointerElementType(), DL); | |||
339 | ||||
340 | // If the initializer is an all-null value and we have an inbounds GEP, | |||
341 | // we already know what the result of any load from that GEP is. | |||
342 | // TODO: Handle splats. | |||
343 | if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds()) | |||
344 | SubInit = Constant::getNullValue(GEP->getResultElementType()); | |||
345 | } | |||
346 | Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI); | |||
347 | ||||
348 | if (GEP->use_empty()) { | |||
349 | GEP->eraseFromParent(); | |||
350 | Changed = true; | |||
351 | } | |||
352 | } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv | |||
353 | if (MI->getRawDest() == V) { | |||
354 | MI->eraseFromParent(); | |||
355 | Changed = true; | |||
356 | } | |||
357 | ||||
358 | } else if (Constant *C = dyn_cast<Constant>(U)) { | |||
359 | // If we have a chain of dead constantexprs or other things dangling from | |||
360 | // us, and if they are all dead, nuke them without remorse. | |||
361 | if (isSafeToDestroyConstant(C)) { | |||
362 | C->destroyConstant(); | |||
363 | CleanupConstantGlobalUsers(V, Init, DL, GetTLI); | |||
364 | return true; | |||
365 | } | |||
366 | } | |||
367 | } | |||
368 | return Changed; | |||
369 | } | |||
370 | ||||
371 | static bool isSafeSROAElementUse(Value *V); | |||
372 | ||||
373 | /// Return true if the specified GEP is a safe user of a derived | |||
374 | /// expression from a global that we want to SROA. | |||
375 | static bool isSafeSROAGEP(User *U) { | |||
376 | // Check to see if this ConstantExpr GEP is SRA'able. In particular, we | |||
377 | // don't like < 3 operand CE's, and we don't like non-constant integer | |||
378 | // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some | |||
379 | // value of C. | |||
380 | if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || | |||
381 | !cast<Constant>(U->getOperand(1))->isNullValue()) | |||
382 | return false; | |||
383 | ||||
384 | gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); | |||
385 | ++GEPI; // Skip over the pointer index. | |||
386 | ||||
387 | // For all other level we require that the indices are constant and inrange. | |||
388 | // In particular, consider: A[0][i]. We cannot know that the user isn't doing | |||
389 | // invalid things like allowing i to index an out-of-range subscript that | |||
390 | // accesses A[1]. This can also happen between different members of a struct | |||
391 | // in llvm IR. | |||
392 | for (; GEPI != E; ++GEPI) { | |||
393 | if (GEPI.isStruct()) | |||
394 | continue; | |||
395 | ||||
396 | ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); | |||
397 | if (!IdxVal || (GEPI.isBoundedSequential() && | |||
398 | IdxVal->getZExtValue() >= GEPI.getSequentialNumElements())) | |||
399 | return false; | |||
400 | } | |||
401 | ||||
402 | return llvm::all_of(U->users(), | |||
403 | [](User *UU) { return isSafeSROAElementUse(UU); }); | |||
404 | } | |||
405 | ||||
406 | /// Return true if the specified instruction is a safe user of a derived | |||
407 | /// expression from a global that we want to SROA. | |||
408 | static bool isSafeSROAElementUse(Value *V) { | |||
409 | // We might have a dead and dangling constant hanging off of here. | |||
410 | if (Constant *C = dyn_cast<Constant>(V)) | |||
411 | return isSafeToDestroyConstant(C); | |||
412 | ||||
413 | Instruction *I = dyn_cast<Instruction>(V); | |||
414 | if (!I) return false; | |||
415 | ||||
416 | // Loads are ok. | |||
417 | if (isa<LoadInst>(I)) return true; | |||
418 | ||||
419 | // Stores *to* the pointer are ok. | |||
420 | if (StoreInst *SI = dyn_cast<StoreInst>(I)) | |||
421 | return SI->getOperand(0) != V; | |||
422 | ||||
423 | // Otherwise, it must be a GEP. Check it and its users are safe to SRA. | |||
424 | return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I); | |||
425 | } | |||
426 | ||||
427 | /// Look at all uses of the global and decide whether it is safe for us to | |||
428 | /// perform this transformation. | |||
429 | static bool GlobalUsersSafeToSRA(GlobalValue *GV) { | |||
430 | for (User *U : GV->users()) { | |||
431 | // The user of the global must be a GEP Inst or a ConstantExpr GEP. | |||
432 | if (!isa<GetElementPtrInst>(U) && | |||
433 | (!isa<ConstantExpr>(U) || | |||
434 | cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) | |||
435 | return false; | |||
436 | ||||
437 | // Check the gep and it's users are safe to SRA | |||
438 | if (!isSafeSROAGEP(U)) | |||
439 | return false; | |||
440 | } | |||
441 | ||||
442 | return true; | |||
443 | } | |||
444 | ||||
445 | static bool IsSRASequential(Type *T) { | |||
446 | return isa<ArrayType>(T) || isa<VectorType>(T); | |||
447 | } | |||
448 | static uint64_t GetSRASequentialNumElements(Type *T) { | |||
449 | if (ArrayType *AT = dyn_cast<ArrayType>(T)) | |||
450 | return AT->getNumElements(); | |||
451 | return cast<FixedVectorType>(T)->getNumElements(); | |||
452 | } | |||
453 | static Type *GetSRASequentialElementType(Type *T) { | |||
454 | if (ArrayType *AT = dyn_cast<ArrayType>(T)) | |||
455 | return AT->getElementType(); | |||
456 | return cast<VectorType>(T)->getElementType(); | |||
457 | } | |||
458 | static bool CanDoGlobalSRA(GlobalVariable *GV) { | |||
459 | Constant *Init = GV->getInitializer(); | |||
460 | ||||
461 | if (isa<StructType>(Init->getType())) { | |||
462 | // nothing to check | |||
463 | } else if (IsSRASequential(Init->getType())) { | |||
464 | if (GetSRASequentialNumElements(Init->getType()) > 16 && | |||
465 | GV->hasNUsesOrMore(16)) | |||
466 | return false; // It's not worth it. | |||
467 | } else | |||
468 | return false; | |||
469 | ||||
470 | return GlobalUsersSafeToSRA(GV); | |||
471 | } | |||
472 | ||||
473 | /// Copy over the debug info for a variable to its SRA replacements. | |||
474 | static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, | |||
475 | uint64_t FragmentOffsetInBits, | |||
476 | uint64_t FragmentSizeInBits, | |||
477 | uint64_t VarSize) { | |||
478 | SmallVector<DIGlobalVariableExpression *, 1> GVs; | |||
479 | GV->getDebugInfo(GVs); | |||
480 | for (auto *GVE : GVs) { | |||
481 | DIVariable *Var = GVE->getVariable(); | |||
482 | DIExpression *Expr = GVE->getExpression(); | |||
483 | // If the FragmentSize is smaller than the variable, | |||
484 | // emit a fragment expression. | |||
485 | if (FragmentSizeInBits < VarSize) { | |||
486 | if (auto E = DIExpression::createFragmentExpression( | |||
487 | Expr, FragmentOffsetInBits, FragmentSizeInBits)) | |||
488 | Expr = *E; | |||
489 | else | |||
490 | return; | |||
491 | } | |||
492 | auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr); | |||
493 | NGV->addDebugInfo(NGVE); | |||
494 | } | |||
495 | } | |||
496 | ||||
497 | /// Perform scalar replacement of aggregates on the specified global variable. | |||
498 | /// This opens the door for other optimizations by exposing the behavior of the | |||
499 | /// program in a more fine-grained way. We have determined that this | |||
500 | /// transformation is safe already. We return the first global variable we | |||
501 | /// insert so that the caller can reprocess it. | |||
502 | static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { | |||
503 | // Make sure this global only has simple uses that we can SRA. | |||
504 | if (!CanDoGlobalSRA(GV)) | |||
505 | return nullptr; | |||
506 | ||||
507 | assert(GV->hasLocalLinkage())((void)0); | |||
508 | Constant *Init = GV->getInitializer(); | |||
509 | Type *Ty = Init->getType(); | |||
510 | uint64_t VarSize = DL.getTypeSizeInBits(Ty); | |||
511 | ||||
512 | std::map<unsigned, GlobalVariable *> NewGlobals; | |||
513 | ||||
514 | // Get the alignment of the global, either explicit or target-specific. | |||
515 | Align StartAlignment = | |||
516 | DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getType()); | |||
517 | ||||
518 | // Loop over all users and create replacement variables for used aggregate | |||
519 | // elements. | |||
520 | for (User *GEP : GV->users()) { | |||
521 | assert(((isa<ConstantExpr>(GEP) && cast<ConstantExpr>(GEP)->getOpcode() ==((void)0) | |||
522 | Instruction::GetElementPtr) ||((void)0) | |||
523 | isa<GetElementPtrInst>(GEP)) &&((void)0) | |||
524 | "NonGEP CE's are not SRAable!")((void)0); | |||
525 | ||||
526 | // Ignore the 1th operand, which has to be zero or else the program is quite | |||
527 | // broken (undefined). Get the 2nd operand, which is the structure or array | |||
528 | // index. | |||
529 | unsigned ElementIdx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); | |||
530 | if (NewGlobals.count(ElementIdx) == 1) | |||
531 | continue; // we`ve already created replacement variable | |||
532 | assert(NewGlobals.count(ElementIdx) == 0)((void)0); | |||
533 | ||||
534 | Type *ElTy = nullptr; | |||
535 | if (StructType *STy = dyn_cast<StructType>(Ty)) | |||
536 | ElTy = STy->getElementType(ElementIdx); | |||
537 | else | |||
538 | ElTy = GetSRASequentialElementType(Ty); | |||
539 | assert(ElTy)((void)0); | |||
540 | ||||
541 | Constant *In = Init->getAggregateElement(ElementIdx); | |||
542 | assert(In && "Couldn't get element of initializer?")((void)0); | |||
543 | ||||
544 | GlobalVariable *NGV = new GlobalVariable( | |||
545 | ElTy, false, GlobalVariable::InternalLinkage, In, | |||
546 | GV->getName() + "." + Twine(ElementIdx), GV->getThreadLocalMode(), | |||
547 | GV->getType()->getAddressSpace()); | |||
548 | NGV->setExternallyInitialized(GV->isExternallyInitialized()); | |||
549 | NGV->copyAttributesFrom(GV); | |||
550 | NewGlobals.insert(std::make_pair(ElementIdx, NGV)); | |||
551 | ||||
552 | if (StructType *STy = dyn_cast<StructType>(Ty)) { | |||
553 | const StructLayout &Layout = *DL.getStructLayout(STy); | |||
554 | ||||
555 | // Calculate the known alignment of the field. If the original aggregate | |||
556 | // had 256 byte alignment for example, something might depend on that: | |||
557 | // propagate info to each field. | |||
558 | uint64_t FieldOffset = Layout.getElementOffset(ElementIdx); | |||
559 | Align NewAlign = commonAlignment(StartAlignment, FieldOffset); | |||
560 | if (NewAlign > DL.getABITypeAlign(STy->getElementType(ElementIdx))) | |||
561 | NGV->setAlignment(NewAlign); | |||
562 | ||||
563 | // Copy over the debug info for the variable. | |||
564 | uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType()); | |||
565 | uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx); | |||
566 | transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, VarSize); | |||
567 | } else { | |||
568 | uint64_t EltSize = DL.getTypeAllocSize(ElTy); | |||
569 | Align EltAlign = DL.getABITypeAlign(ElTy); | |||
570 | uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy); | |||
571 | ||||
572 | // Calculate the known alignment of the field. If the original aggregate | |||
573 | // had 256 byte alignment for example, something might depend on that: | |||
574 | // propagate info to each field. | |||
575 | Align NewAlign = commonAlignment(StartAlignment, EltSize * ElementIdx); | |||
576 | if (NewAlign > EltAlign) | |||
577 | NGV->setAlignment(NewAlign); | |||
578 | transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx, | |||
579 | FragmentSizeInBits, VarSize); | |||
580 | } | |||
581 | } | |||
582 | ||||
583 | if (NewGlobals.empty()) | |||
584 | return nullptr; | |||
585 | ||||
586 | Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); | |||
587 | for (auto NewGlobalVar : NewGlobals) | |||
588 | Globals.push_back(NewGlobalVar.second); | |||
589 | ||||
590 | LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n")do { } while (false); | |||
591 | ||||
592 | Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); | |||
593 | ||||
594 | // Loop over all of the uses of the global, replacing the constantexpr geps, | |||
595 | // with smaller constantexpr geps or direct references. | |||
596 | while (!GV->use_empty()) { | |||
597 | User *GEP = GV->user_back(); | |||
598 | assert(((isa<ConstantExpr>(GEP) &&((void)0) | |||
599 | cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||((void)0) | |||
600 | isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!")((void)0); | |||
601 | ||||
602 | // Ignore the 1th operand, which has to be zero or else the program is quite | |||
603 | // broken (undefined). Get the 2nd operand, which is the structure or array | |||
604 | // index. | |||
605 | unsigned ElementIdx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); | |||
606 | assert(NewGlobals.count(ElementIdx) == 1)((void)0); | |||
607 | ||||
608 | Value *NewPtr = NewGlobals[ElementIdx]; | |||
609 | Type *NewTy = NewGlobals[ElementIdx]->getValueType(); | |||
610 | ||||
611 | // Form a shorter GEP if needed. | |||
612 | if (GEP->getNumOperands() > 3) { | |||
613 | if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) { | |||
614 | SmallVector<Constant*, 8> Idxs; | |||
615 | Idxs.push_back(NullInt); | |||
616 | for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) | |||
617 | Idxs.push_back(CE->getOperand(i)); | |||
618 | NewPtr = | |||
619 | ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs); | |||
620 | } else { | |||
621 | GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); | |||
622 | SmallVector<Value*, 8> Idxs; | |||
623 | Idxs.push_back(NullInt); | |||
624 | for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) | |||
625 | Idxs.push_back(GEPI->getOperand(i)); | |||
626 | NewPtr = GetElementPtrInst::Create( | |||
627 | NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(ElementIdx), | |||
628 | GEPI); | |||
629 | } | |||
630 | } | |||
631 | GEP->replaceAllUsesWith(NewPtr); | |||
632 | ||||
633 | // We changed the pointer of any memory access user. Recalculate alignments. | |||
634 | for (User *U : NewPtr->users()) { | |||
635 | if (auto *Load = dyn_cast<LoadInst>(U)) { | |||
636 | Align PrefAlign = DL.getPrefTypeAlign(Load->getType()); | |||
637 | Align NewAlign = getOrEnforceKnownAlignment(Load->getPointerOperand(), | |||
638 | PrefAlign, DL, Load); | |||
639 | Load->setAlignment(NewAlign); | |||
640 | } | |||
641 | if (auto *Store = dyn_cast<StoreInst>(U)) { | |||
642 | Align PrefAlign = | |||
643 | DL.getPrefTypeAlign(Store->getValueOperand()->getType()); | |||
644 | Align NewAlign = getOrEnforceKnownAlignment(Store->getPointerOperand(), | |||
645 | PrefAlign, DL, Store); | |||
646 | Store->setAlignment(NewAlign); | |||
647 | } | |||
648 | } | |||
649 | ||||
650 | if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP)) | |||
651 | GEPI->eraseFromParent(); | |||
652 | else | |||
653 | cast<ConstantExpr>(GEP)->destroyConstant(); | |||
654 | } | |||
655 | ||||
656 | // Delete the old global, now that it is dead. | |||
657 | Globals.erase(GV); | |||
658 | ++NumSRA; | |||
659 | ||||
660 | assert(NewGlobals.size() > 0)((void)0); | |||
661 | return NewGlobals.begin()->second; | |||
662 | } | |||
663 | ||||
664 | /// Return true if all users of the specified value will trap if the value is | |||
665 | /// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid | |||
666 | /// reprocessing them. | |||
667 | static bool AllUsesOfValueWillTrapIfNull(const Value *V, | |||
668 | SmallPtrSetImpl<const PHINode*> &PHIs) { | |||
669 | for (const User *U : V->users()) { | |||
670 | if (const Instruction *I = dyn_cast<Instruction>(U)) { | |||
671 | // If null pointer is considered valid, then all uses are non-trapping. | |||
672 | // Non address-space 0 globals have already been pruned by the caller. | |||
673 | if (NullPointerIsDefined(I->getFunction())) | |||
674 | return false; | |||
675 | } | |||
676 | if (isa<LoadInst>(U)) { | |||
677 | // Will trap. | |||
678 | } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { | |||
679 | if (SI->getOperand(0) == V) { | |||
680 | //cerr << "NONTRAPPING USE: " << *U; | |||
681 | return false; // Storing the value. | |||
682 | } | |||
683 | } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { | |||
684 | if (CI->getCalledOperand() != V) { | |||
685 | //cerr << "NONTRAPPING USE: " << *U; | |||
686 | return false; // Not calling the ptr | |||
687 | } | |||
688 | } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { | |||
689 | if (II->getCalledOperand() != V) { | |||
690 | //cerr << "NONTRAPPING USE: " << *U; | |||
691 | return false; // Not calling the ptr | |||
692 | } | |||
693 | } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) { | |||
694 | if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; | |||
695 | } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { | |||
696 | if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; | |||
697 | } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { | |||
698 | // If we've already seen this phi node, ignore it, it has already been | |||
699 | // checked. | |||
700 | if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) | |||
701 | return false; | |||
702 | } else if (isa<ICmpInst>(U) && | |||
703 | !ICmpInst::isSigned(cast<ICmpInst>(U)->getPredicate()) && | |||
704 | isa<LoadInst>(U->getOperand(0)) && | |||
705 | isa<ConstantPointerNull>(U->getOperand(1))) { | |||
706 | assert(isa<GlobalValue>(((void)0) | |||
707 | cast<LoadInst>(U->getOperand(0))->getPointerOperand()) &&((void)0) | |||
708 | "Should be GlobalVariable")((void)0); | |||
709 | // This and only this kind of non-signed ICmpInst is to be replaced with | |||
710 | // the comparing of the value of the created global init bool later in | |||
711 | // optimizeGlobalAddressOfMalloc for the global variable. | |||
712 | } else { | |||
713 | //cerr << "NONTRAPPING USE: " << *U; | |||
714 | return false; | |||
715 | } | |||
716 | } | |||
717 | return true; | |||
718 | } | |||
719 | ||||
720 | /// Return true if all uses of any loads from GV will trap if the loaded value | |||
721 | /// is null. Note that this also permits comparisons of the loaded value | |||
722 | /// against null, as a special case. | |||
723 | static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { | |||
724 | for (const User *U : GV->users()) | |||
725 | if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { | |||
726 | SmallPtrSet<const PHINode*, 8> PHIs; | |||
727 | if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) | |||
728 | return false; | |||
729 | } else if (isa<StoreInst>(U)) { | |||
730 | // Ignore stores to the global. | |||
731 | } else { | |||
732 | // We don't know or understand this user, bail out. | |||
733 | //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; | |||
734 | return false; | |||
735 | } | |||
736 | return true; | |||
737 | } | |||
738 | ||||
739 | static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { | |||
740 | bool Changed = false; | |||
741 | for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) { | |||
742 | Instruction *I = cast<Instruction>(*UI++); | |||
743 | // Uses are non-trapping if null pointer is considered valid. | |||
744 | // Non address-space 0 globals are already pruned by the caller. | |||
745 | if (NullPointerIsDefined(I->getFunction())) | |||
746 | return false; | |||
747 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) { | |||
748 | LI->setOperand(0, NewV); | |||
749 | Changed = true; | |||
750 | } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { | |||
751 | if (SI->getOperand(1) == V) { | |||
752 | SI->setOperand(1, NewV); | |||
753 | Changed = true; | |||
754 | } | |||
755 | } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { | |||
756 | CallBase *CB = cast<CallBase>(I); | |||
757 | if (CB->getCalledOperand() == V) { | |||
758 | // Calling through the pointer! Turn into a direct call, but be careful | |||
759 | // that the pointer is not also being passed as an argument. | |||
760 | CB->setCalledOperand(NewV); | |||
761 | Changed = true; | |||
762 | bool PassedAsArg = false; | |||
763 | for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) | |||
764 | if (CB->getArgOperand(i) == V) { | |||
765 | PassedAsArg = true; | |||
766 | CB->setArgOperand(i, NewV); | |||
767 | } | |||
768 | ||||
769 | if (PassedAsArg) { | |||
770 | // Being passed as an argument also. Be careful to not invalidate UI! | |||
771 | UI = V->user_begin(); | |||
772 | } | |||
773 | } | |||
774 | } else if (CastInst *CI = dyn_cast<CastInst>(I)) { | |||
775 | Changed |= OptimizeAwayTrappingUsesOfValue(CI, | |||
776 | ConstantExpr::getCast(CI->getOpcode(), | |||
777 | NewV, CI->getType())); | |||
778 | if (CI->use_empty()) { | |||
779 | Changed = true; | |||
780 | CI->eraseFromParent(); | |||
781 | } | |||
782 | } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { | |||
783 | // Should handle GEP here. | |||
784 | SmallVector<Constant*, 8> Idxs; | |||
785 | Idxs.reserve(GEPI->getNumOperands()-1); | |||
786 | for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end(); | |||
787 | i != e; ++i) | |||
788 | if (Constant *C = dyn_cast<Constant>(*i)) | |||
789 | Idxs.push_back(C); | |||
790 | else | |||
791 | break; | |||
792 | if (Idxs.size() == GEPI->getNumOperands()-1) | |||
793 | Changed |= OptimizeAwayTrappingUsesOfValue( | |||
794 | GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(), | |||
795 | NewV, Idxs)); | |||
796 | if (GEPI->use_empty()) { | |||
797 | Changed = true; | |||
798 | GEPI->eraseFromParent(); | |||
799 | } | |||
800 | } | |||
801 | } | |||
802 | ||||
803 | return Changed; | |||
804 | } | |||
805 | ||||
806 | /// The specified global has only one non-null value stored into it. If there | |||
807 | /// are uses of the loaded value that would trap if the loaded value is | |||
808 | /// dynamically null, then we know that they cannot be reachable with a null | |||
809 | /// optimize away the load. | |||
810 | static bool OptimizeAwayTrappingUsesOfLoads( | |||
811 | GlobalVariable *GV, Constant *LV, const DataLayout &DL, | |||
812 | function_ref<TargetLibraryInfo &(Function &)> GetTLI) { | |||
813 | bool Changed = false; | |||
814 | ||||
815 | // Keep track of whether we are able to remove all the uses of the global | |||
816 | // other than the store that defines it. | |||
817 | bool AllNonStoreUsesGone = true; | |||
818 | ||||
819 | // Replace all uses of loads with uses of uses of the stored value. | |||
820 | for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){ | |||
821 | User *GlobalUser = *GUI++; | |||
822 | if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { | |||
823 | Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); | |||
824 | // If we were able to delete all uses of the loads | |||
825 | if (LI->use_empty()) { | |||
826 | LI->eraseFromParent(); | |||
827 | Changed = true; | |||
828 | } else { | |||
829 | AllNonStoreUsesGone = false; | |||
830 | } | |||
831 | } else if (isa<StoreInst>(GlobalUser)) { | |||
832 | // Ignore the store that stores "LV" to the global. | |||
833 | assert(GlobalUser->getOperand(1) == GV &&((void)0) | |||
834 | "Must be storing *to* the global")((void)0); | |||
835 | } else { | |||
836 | AllNonStoreUsesGone = false; | |||
837 | ||||
838 | // If we get here we could have other crazy uses that are transitively | |||
839 | // loaded. | |||
840 | assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||((void)0) | |||
841 | isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||((void)0) | |||
842 | isa<BitCastInst>(GlobalUser) ||((void)0) | |||
843 | isa<GetElementPtrInst>(GlobalUser)) &&((void)0) | |||
844 | "Only expect load and stores!")((void)0); | |||
845 | } | |||
846 | } | |||
847 | ||||
848 | if (Changed) { | |||
849 | LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GVdo { } while (false) | |||
850 | << "\n")do { } while (false); | |||
851 | ++NumGlobUses; | |||
852 | } | |||
853 | ||||
854 | // If we nuked all of the loads, then none of the stores are needed either, | |||
855 | // nor is the global. | |||
856 | if (AllNonStoreUsesGone) { | |||
857 | if (isLeakCheckerRoot(GV)) { | |||
858 | Changed |= CleanupPointerRootUsers(GV, GetTLI); | |||
859 | } else { | |||
860 | Changed = true; | |||
861 | CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI); | |||
862 | } | |||
863 | if (GV->use_empty()) { | |||
864 | LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n")do { } while (false); | |||
865 | Changed = true; | |||
866 | GV->eraseFromParent(); | |||
867 | ++NumDeleted; | |||
868 | } | |||
869 | } | |||
870 | return Changed; | |||
871 | } | |||
872 | ||||
873 | /// Walk the use list of V, constant folding all of the instructions that are | |||
874 | /// foldable. | |||
875 | static void ConstantPropUsersOf(Value *V, const DataLayout &DL, | |||
876 | TargetLibraryInfo *TLI) { | |||
877 | for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; ) | |||
878 | if (Instruction *I = dyn_cast<Instruction>(*UI++)) | |||
879 | if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) { | |||
880 | I->replaceAllUsesWith(NewC); | |||
881 | ||||
882 | // Advance UI to the next non-I use to avoid invalidating it! | |||
883 | // Instructions could multiply use V. | |||
884 | while (UI != E && *UI == I) | |||
885 | ++UI; | |||
886 | if (isInstructionTriviallyDead(I, TLI)) | |||
887 | I->eraseFromParent(); | |||
888 | } | |||
889 | } | |||
890 | ||||
891 | /// This function takes the specified global variable, and transforms the | |||
892 | /// program as if it always contained the result of the specified malloc. | |||
893 | /// Because it is always the result of the specified malloc, there is no reason | |||
894 | /// to actually DO the malloc. Instead, turn the malloc into a global, and any | |||
895 | /// loads of GV as uses of the new global. | |||
896 | static GlobalVariable * | |||
897 | OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, | |||
898 | ConstantInt *NElements, const DataLayout &DL, | |||
899 | TargetLibraryInfo *TLI) { | |||
900 | LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CIdo { } while (false) | |||
901 | << '\n')do { } while (false); | |||
902 | ||||
903 | Type *GlobalType; | |||
904 | if (NElements->getZExtValue() == 1) | |||
905 | GlobalType = AllocTy; | |||
906 | else | |||
907 | // If we have an array allocation, the global variable is of an array. | |||
908 | GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue()); | |||
909 | ||||
910 | // Create the new global variable. The contents of the malloc'd memory is | |||
911 | // undefined, so initialize with an undef value. | |||
912 | GlobalVariable *NewGV = new GlobalVariable( | |||
913 | *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage, | |||
914 | UndefValue::get(GlobalType), GV->getName() + ".body", nullptr, | |||
915 | GV->getThreadLocalMode()); | |||
916 | ||||
917 | // If there are bitcast users of the malloc (which is typical, usually we have | |||
918 | // a malloc + bitcast) then replace them with uses of the new global. Update | |||
919 | // other users to use the global as well. | |||
920 | BitCastInst *TheBC = nullptr; | |||
921 | while (!CI->use_empty()) { | |||
922 | Instruction *User = cast<Instruction>(CI->user_back()); | |||
923 | if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { | |||
924 | if (BCI->getType() == NewGV->getType()) { | |||
925 | BCI->replaceAllUsesWith(NewGV); | |||
926 | BCI->eraseFromParent(); | |||
927 | } else { | |||
928 | BCI->setOperand(0, NewGV); | |||
929 | } | |||
930 | } else { | |||
931 | if (!TheBC) | |||
932 | TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI); | |||
933 | User->replaceUsesOfWith(CI, TheBC); | |||
934 | } | |||
935 | } | |||
936 | ||||
937 | Constant *RepValue = NewGV; | |||
938 | if (NewGV->getType() != GV->getValueType()) | |||
939 | RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType()); | |||
940 | ||||
941 | // If there is a comparison against null, we will insert a global bool to | |||
942 | // keep track of whether the global was initialized yet or not. | |||
943 | GlobalVariable *InitBool = | |||
944 | new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, | |||
945 | GlobalValue::InternalLinkage, | |||
946 | ConstantInt::getFalse(GV->getContext()), | |||
947 | GV->getName()+".init", GV->getThreadLocalMode()); | |||
948 | bool InitBoolUsed = false; | |||
949 | ||||
950 | // Loop over all uses of GV, processing them in turn. | |||
951 | while (!GV->use_empty()) { | |||
952 | if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) { | |||
953 | // The global is initialized when the store to it occurs. If the stored | |||
954 | // value is null value, the global bool is set to false, otherwise true. | |||
955 | new StoreInst(ConstantInt::getBool( | |||
956 | GV->getContext(), | |||
957 | !isa<ConstantPointerNull>(SI->getValueOperand())), | |||
958 | InitBool, false, Align(1), SI->getOrdering(), | |||
959 | SI->getSyncScopeID(), SI); | |||
960 | SI->eraseFromParent(); | |||
961 | continue; | |||
962 | } | |||
963 | ||||
964 | LoadInst *LI = cast<LoadInst>(GV->user_back()); | |||
965 | while (!LI->use_empty()) { | |||
966 | Use &LoadUse = *LI->use_begin(); | |||
967 | ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser()); | |||
968 | if (!ICI) { | |||
969 | LoadUse = RepValue; | |||
970 | continue; | |||
971 | } | |||
972 | ||||
973 | // Replace the cmp X, 0 with a use of the bool value. | |||
974 | Value *LV = new LoadInst(InitBool->getValueType(), InitBool, | |||
975 | InitBool->getName() + ".val", false, Align(1), | |||
976 | LI->getOrdering(), LI->getSyncScopeID(), LI); | |||
977 | InitBoolUsed = true; | |||
978 | switch (ICI->getPredicate()) { | |||
979 | default: llvm_unreachable("Unknown ICmp Predicate!")__builtin_unreachable(); | |||
980 | case ICmpInst::ICMP_ULT: // X < null -> always false | |||
981 | LV = ConstantInt::getFalse(GV->getContext()); | |||
982 | break; | |||
983 | case ICmpInst::ICMP_UGE: // X >= null -> always true | |||
984 | LV = ConstantInt::getTrue(GV->getContext()); | |||
985 | break; | |||
986 | case ICmpInst::ICMP_ULE: | |||
987 | case ICmpInst::ICMP_EQ: | |||
988 | LV = BinaryOperator::CreateNot(LV, "notinit", ICI); | |||
989 | break; | |||
990 | case ICmpInst::ICMP_NE: | |||
991 | case ICmpInst::ICMP_UGT: | |||
992 | break; // no change. | |||
993 | } | |||
994 | ICI->replaceAllUsesWith(LV); | |||
995 | ICI->eraseFromParent(); | |||
996 | } | |||
997 | LI->eraseFromParent(); | |||
998 | } | |||
999 | ||||
1000 | // If the initialization boolean was used, insert it, otherwise delete it. | |||
1001 | if (!InitBoolUsed) { | |||
1002 | while (!InitBool->use_empty()) // Delete initializations | |||
1003 | cast<StoreInst>(InitBool->user_back())->eraseFromParent(); | |||
1004 | delete InitBool; | |||
1005 | } else | |||
1006 | GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool); | |||
1007 | ||||
1008 | // Now the GV is dead, nuke it and the malloc.. | |||
1009 | GV->eraseFromParent(); | |||
1010 | CI->eraseFromParent(); | |||
1011 | ||||
1012 | // To further other optimizations, loop over all users of NewGV and try to | |||
1013 | // constant prop them. This will promote GEP instructions with constant | |||
1014 | // indices into GEP constant-exprs, which will allow global-opt to hack on it. | |||
1015 | ConstantPropUsersOf(NewGV, DL, TLI); | |||
1016 | if (RepValue != NewGV) | |||
1017 | ConstantPropUsersOf(RepValue, DL, TLI); | |||
1018 | ||||
1019 | return NewGV; | |||
1020 | } | |||
1021 | ||||
1022 | /// Scan the use-list of V checking to make sure that there are no complex uses | |||
1023 | /// of V. We permit simple things like dereferencing the pointer, but not | |||
1024 | /// storing through the address, unless it is to the specified global. | |||
1025 | static bool | |||
1026 | valueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, | |||
1027 | const GlobalVariable *GV) { | |||
1028 | for (const User *U : V->users()) { | |||
1029 | const Instruction *Inst = cast<Instruction>(U); | |||
1030 | ||||
1031 | if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { | |||
1032 | continue; // Fine, ignore. | |||
1033 | } | |||
1034 | ||||
1035 | if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { | |||
1036 | if (SI->getOperand(0) == V && SI->getOperand(1) != GV) | |||
1037 | return false; // Storing the pointer itself... bad. | |||
1038 | continue; // Otherwise, storing through it, or storing into GV... fine. | |||
1039 | } | |||
1040 | ||||
1041 | if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { | |||
1042 | if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV)) | |||
1043 | return false; | |||
1044 | continue; | |||
1045 | } | |||
1046 | ||||
1047 | return false; | |||
1048 | } | |||
1049 | return true; | |||
1050 | } | |||
1051 | ||||
1052 | /// This function is called when we see a pointer global variable with a single | |||
1053 | /// value stored it that is a malloc or cast of malloc. | |||
1054 | static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, | |||
1055 | Type *AllocTy, | |||
1056 | AtomicOrdering Ordering, | |||
1057 | const DataLayout &DL, | |||
1058 | TargetLibraryInfo *TLI) { | |||
1059 | // If this is a malloc of an abstract type, don't touch it. | |||
1060 | if (!AllocTy->isSized()) | |||
1061 | return false; | |||
1062 | ||||
1063 | // We can't optimize this global unless all uses of it are *known* to be | |||
1064 | // of the malloc value, not of the null initializer value (consider a use | |||
1065 | // that compares the global's value against zero to see if the malloc has | |||
1066 | // been reached). To do this, we check to see if all uses of the global | |||
1067 | // would trap if the global were null: this proves that they must all | |||
1068 | // happen after the malloc. | |||
1069 | if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) | |||
1070 | return false; | |||
1071 | ||||
1072 | // We can't optimize this if the malloc itself is used in a complex way, | |||
1073 | // for example, being stored into multiple globals. This allows the | |||
1074 | // malloc to be stored into the specified global, loaded icmp'd. | |||
1075 | // These are all things we could transform to using the global for. | |||
1076 | if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV)) | |||
1077 | return false; | |||
1078 | ||||
1079 | // If we have a global that is only initialized with a fixed size malloc, | |||
1080 | // transform the program to use global memory instead of malloc'd memory. | |||
1081 | // This eliminates dynamic allocation, avoids an indirection accessing the | |||
1082 | // data, and exposes the resultant global to further GlobalOpt. | |||
1083 | // We cannot optimize the malloc if we cannot determine malloc array size. | |||
1084 | Value *NElems = getMallocArraySize(CI, DL, TLI, true); | |||
1085 | if (!NElems) | |||
1086 | return false; | |||
1087 | ||||
1088 | if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) | |||
1089 | // Restrict this transformation to only working on small allocations | |||
1090 | // (2048 bytes currently), as we don't want to introduce a 16M global or | |||
1091 | // something. | |||
1092 | if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) { | |||
1093 | OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); | |||
1094 | return true; | |||
1095 | } | |||
1096 | ||||
1097 | return false; | |||
1098 | } | |||
1099 | ||||
1100 | // Try to optimize globals based on the knowledge that only one value (besides | |||
1101 | // its initializer) is ever stored to the global. | |||
1102 | static bool | |||
1103 | optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, | |||
1104 | AtomicOrdering Ordering, const DataLayout &DL, | |||
1105 | function_ref<TargetLibraryInfo &(Function &)> GetTLI) { | |||
1106 | // Ignore no-op GEPs and bitcasts. | |||
1107 | StoredOnceVal = StoredOnceVal->stripPointerCasts(); | |||
1108 | ||||
1109 | // If we are dealing with a pointer global that is initialized to null and | |||
1110 | // only has one (non-null) value stored into it, then we can optimize any | |||
1111 | // users of the loaded value (often calls and loads) that would trap if the | |||
1112 | // value was null. | |||
1113 | if (GV->getInitializer()->getType()->isPointerTy() && | |||
1114 | GV->getInitializer()->isNullValue() && | |||
1115 | !NullPointerIsDefined( | |||
1116 | nullptr /* F */, | |||
1117 | GV->getInitializer()->getType()->getPointerAddressSpace())) { | |||
1118 | if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { | |||
1119 | if (GV->getInitializer()->getType() != SOVC->getType()) | |||
1120 | SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); | |||
1121 | ||||
1122 | // Optimize away any trapping uses of the loaded value. | |||
1123 | if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI)) | |||
1124 | return true; | |||
1125 | } else if (CallInst *CI = extractMallocCall(StoredOnceVal, GetTLI)) { | |||
1126 | auto *TLI = &GetTLI(*CI->getFunction()); | |||
1127 | Type *MallocType = getMallocAllocatedType(CI, TLI); | |||
1128 | if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, | |||
1129 | Ordering, DL, TLI)) | |||
1130 | return true; | |||
1131 | } | |||
1132 | } | |||
1133 | ||||
1134 | return false; | |||
1135 | } | |||
1136 | ||||
1137 | /// At this point, we have learned that the only two values ever stored into GV | |||
1138 | /// are its initializer and OtherVal. See if we can shrink the global into a | |||
1139 | /// boolean and select between the two values whenever it is used. This exposes | |||
1140 | /// the values to other scalar optimizations. | |||
1141 | static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { | |||
1142 | Type *GVElType = GV->getValueType(); | |||
1143 | ||||
1144 | // If GVElType is already i1, it is already shrunk. If the type of the GV is | |||
1145 | // an FP value, pointer or vector, don't do this optimization because a select | |||
1146 | // between them is very expensive and unlikely to lead to later | |||
1147 | // simplification. In these cases, we typically end up with "cond ? v1 : v2" | |||
1148 | // where v1 and v2 both require constant pool loads, a big loss. | |||
1149 | if (GVElType == Type::getInt1Ty(GV->getContext()) || | |||
1150 | GVElType->isFloatingPointTy() || | |||
1151 | GVElType->isPointerTy() || GVElType->isVectorTy()) | |||
1152 | return false; | |||
1153 | ||||
1154 | // Walk the use list of the global seeing if all the uses are load or store. | |||
1155 | // If there is anything else, bail out. | |||
1156 | for (User *U : GV->users()) | |||
1157 | if (!isa<LoadInst>(U) && !isa<StoreInst>(U)) | |||
1158 | return false; | |||
1159 | ||||
1160 | LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n")do { } while (false); | |||
1161 | ||||
1162 | // Create the new global, initializing it to false. | |||
1163 | GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), | |||
1164 | false, | |||
1165 | GlobalValue::InternalLinkage, | |||
1166 | ConstantInt::getFalse(GV->getContext()), | |||
1167 | GV->getName()+".b", | |||
1168 | GV->getThreadLocalMode(), | |||
1169 | GV->getType()->getAddressSpace()); | |||
1170 | NewGV->copyAttributesFrom(GV); | |||
1171 | GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV); | |||
1172 | ||||
1173 | Constant *InitVal = GV->getInitializer(); | |||
1174 | assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&((void)0) | |||
1175 | "No reason to shrink to bool!")((void)0); | |||
1176 | ||||
1177 | SmallVector<DIGlobalVariableExpression *, 1> GVs; | |||
1178 | GV->getDebugInfo(GVs); | |||
1179 | ||||
1180 | // If initialized to zero and storing one into the global, we can use a cast | |||
1181 | // instead of a select to synthesize the desired value. | |||
1182 | bool IsOneZero = false; | |||
1183 | bool EmitOneOrZero = true; | |||
1184 | auto *CI = dyn_cast<ConstantInt>(OtherVal); | |||
1185 | if (CI && CI->getValue().getActiveBits() <= 64) { | |||
1186 | IsOneZero = InitVal->isNullValue() && CI->isOne(); | |||
1187 | ||||
1188 | auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer()); | |||
1189 | if (CIInit && CIInit->getValue().getActiveBits() <= 64) { | |||
1190 | uint64_t ValInit = CIInit->getZExtValue(); | |||
1191 | uint64_t ValOther = CI->getZExtValue(); | |||
1192 | uint64_t ValMinus = ValOther - ValInit; | |||
1193 | ||||
1194 | for(auto *GVe : GVs){ | |||
1195 | DIGlobalVariable *DGV = GVe->getVariable(); | |||
1196 | DIExpression *E = GVe->getExpression(); | |||
1197 | const DataLayout &DL = GV->getParent()->getDataLayout(); | |||
1198 | unsigned SizeInOctets = | |||
1199 | DL.getTypeAllocSizeInBits(NewGV->getValueType()) / 8; | |||
1200 | ||||
1201 | // It is expected that the address of global optimized variable is on | |||
1202 | // top of the stack. After optimization, value of that variable will | |||
1203 | // be ether 0 for initial value or 1 for other value. The following | |||
1204 | // expression should return constant integer value depending on the | |||
1205 | // value at global object address: | |||
1206 | // val * (ValOther - ValInit) + ValInit: | |||
1207 | // DW_OP_deref DW_OP_constu <ValMinus> | |||
1208 | // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value | |||
1209 | SmallVector<uint64_t, 12> Ops = { | |||
1210 | dwarf::DW_OP_deref_size, SizeInOctets, | |||
1211 | dwarf::DW_OP_constu, ValMinus, | |||
1212 | dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit, | |||
1213 | dwarf::DW_OP_plus}; | |||
1214 | bool WithStackValue = true; | |||
1215 | E = DIExpression::prependOpcodes(E, Ops, WithStackValue); | |||
1216 | DIGlobalVariableExpression *DGVE = | |||
1217 | DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E); | |||
1218 | NewGV->addDebugInfo(DGVE); | |||
1219 | } | |||
1220 | EmitOneOrZero = false; | |||
1221 | } | |||
1222 | } | |||
1223 | ||||
1224 | if (EmitOneOrZero) { | |||
1225 | // FIXME: This will only emit address for debugger on which will | |||
1226 | // be written only 0 or 1. | |||
1227 | for(auto *GV : GVs) | |||
1228 | NewGV->addDebugInfo(GV); | |||
1229 | } | |||
1230 | ||||
1231 | while (!GV->use_empty()) { | |||
1232 | Instruction *UI = cast<Instruction>(GV->user_back()); | |||
1233 | if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { | |||
1234 | // Change the store into a boolean store. | |||
1235 | bool StoringOther = SI->getOperand(0) == OtherVal; | |||
1236 | // Only do this if we weren't storing a loaded value. | |||
1237 | Value *StoreVal; | |||
1238 | if (StoringOther || SI->getOperand(0) == InitVal) { | |||
1239 | StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), | |||
1240 | StoringOther); | |||
1241 | } else { | |||
1242 | // Otherwise, we are storing a previously loaded copy. To do this, | |||
1243 | // change the copy from copying the original value to just copying the | |||
1244 | // bool. | |||
1245 | Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); | |||
1246 | ||||
1247 | // If we've already replaced the input, StoredVal will be a cast or | |||
1248 | // select instruction. If not, it will be a load of the original | |||
1249 | // global. | |||
1250 | if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { | |||
1251 | assert(LI->getOperand(0) == GV && "Not a copy!")((void)0); | |||
1252 | // Insert a new load, to preserve the saved value. | |||
1253 | StoreVal = new LoadInst(NewGV->getValueType(), NewGV, | |||
1254 | LI->getName() + ".b", false, Align(1), | |||
1255 | LI->getOrdering(), LI->getSyncScopeID(), LI); | |||
1256 | } else { | |||
1257 | assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&((void)0) | |||
1258 | "This is not a form that we understand!")((void)0); | |||
1259 | StoreVal = StoredVal->getOperand(0); | |||
1260 | assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!")((void)0); | |||
1261 | } | |||
1262 | } | |||
1263 | StoreInst *NSI = | |||
1264 | new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(), | |||
1265 | SI->getSyncScopeID(), SI); | |||
1266 | NSI->setDebugLoc(SI->getDebugLoc()); | |||
1267 | } else { | |||
1268 | // Change the load into a load of bool then a select. | |||
1269 | LoadInst *LI = cast<LoadInst>(UI); | |||
1270 | LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV, | |||
1271 | LI->getName() + ".b", false, Align(1), | |||
1272 | LI->getOrdering(), LI->getSyncScopeID(), LI); | |||
1273 | Instruction *NSI; | |||
1274 | if (IsOneZero) | |||
1275 | NSI = new ZExtInst(NLI, LI->getType(), "", LI); | |||
1276 | else | |||
1277 | NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI); | |||
1278 | NSI->takeName(LI); | |||
1279 | // Since LI is split into two instructions, NLI and NSI both inherit the | |||
1280 | // same DebugLoc | |||
1281 | NLI->setDebugLoc(LI->getDebugLoc()); | |||
1282 | NSI->setDebugLoc(LI->getDebugLoc()); | |||
1283 | LI->replaceAllUsesWith(NSI); | |||
1284 | } | |||
1285 | UI->eraseFromParent(); | |||
1286 | } | |||
1287 | ||||
1288 | // Retain the name of the old global variable. People who are debugging their | |||
1289 | // programs may expect these variables to be named the same. | |||
1290 | NewGV->takeName(GV); | |||
1291 | GV->eraseFromParent(); | |||
1292 | return true; | |||
1293 | } | |||
1294 | ||||
1295 | static bool deleteIfDead( | |||
1296 | GlobalValue &GV, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { | |||
1297 | GV.removeDeadConstantUsers(); | |||
1298 | ||||
1299 | if (!GV.isDiscardableIfUnused() && !GV.isDeclaration()) | |||
1300 | return false; | |||
1301 | ||||
1302 | if (const Comdat *C = GV.getComdat()) | |||
1303 | if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C)) | |||
1304 | return false; | |||
1305 | ||||
1306 | bool Dead; | |||
1307 | if (auto *F = dyn_cast<Function>(&GV)) | |||
1308 | Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead(); | |||
1309 | else | |||
1310 | Dead = GV.use_empty(); | |||
1311 | if (!Dead) | |||
1312 | return false; | |||
1313 | ||||
1314 | LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n")do { } while (false); | |||
1315 | GV.eraseFromParent(); | |||
1316 | ++NumDeleted; | |||
1317 | return true; | |||
1318 | } | |||
1319 | ||||
1320 | static bool isPointerValueDeadOnEntryToFunction( | |||
1321 | const Function *F, GlobalValue *GV, | |||
1322 | function_ref<DominatorTree &(Function &)> LookupDomTree) { | |||
1323 | // Find all uses of GV. We expect them all to be in F, and if we can't | |||
1324 | // identify any of the uses we bail out. | |||
1325 | // | |||
1326 | // On each of these uses, identify if the memory that GV points to is | |||
1327 | // used/required/live at the start of the function. If it is not, for example | |||
1328 | // if the first thing the function does is store to the GV, the GV can | |||
1329 | // possibly be demoted. | |||
1330 | // | |||
1331 | // We don't do an exhaustive search for memory operations - simply look | |||
1332 | // through bitcasts as they're quite common and benign. | |||
1333 | const DataLayout &DL = GV->getParent()->getDataLayout(); | |||
1334 | SmallVector<LoadInst *, 4> Loads; | |||
1335 | SmallVector<StoreInst *, 4> Stores; | |||
1336 | for (auto *U : GV->users()) { | |||
1337 | if (Operator::getOpcode(U) == Instruction::BitCast) { | |||
1338 | for (auto *UU : U->users()) { | |||
1339 | if (auto *LI = dyn_cast<LoadInst>(UU)) | |||
1340 | Loads.push_back(LI); | |||
1341 | else if (auto *SI = dyn_cast<StoreInst>(UU)) | |||
1342 | Stores.push_back(SI); | |||
1343 | else | |||
1344 | return false; | |||
1345 | } | |||
1346 | continue; | |||
1347 | } | |||
1348 | ||||
1349 | Instruction *I = dyn_cast<Instruction>(U); | |||
1350 | if (!I) | |||
1351 | return false; | |||
1352 | assert(I->getParent()->getParent() == F)((void)0); | |||
1353 | ||||
1354 | if (auto *LI = dyn_cast<LoadInst>(I)) | |||
1355 | Loads.push_back(LI); | |||
1356 | else if (auto *SI = dyn_cast<StoreInst>(I)) | |||
1357 | Stores.push_back(SI); | |||
1358 | else | |||
1359 | return false; | |||
1360 | } | |||
1361 | ||||
1362 | // We have identified all uses of GV into loads and stores. Now check if all | |||
1363 | // of them are known not to depend on the value of the global at the function | |||
1364 | // entry point. We do this by ensuring that every load is dominated by at | |||
1365 | // least one store. | |||
1366 | auto &DT = LookupDomTree(*const_cast<Function *>(F)); | |||
1367 | ||||
1368 | // The below check is quadratic. Check we're not going to do too many tests. | |||
1369 | // FIXME: Even though this will always have worst-case quadratic time, we | |||
1370 | // could put effort into minimizing the average time by putting stores that | |||
1371 | // have been shown to dominate at least one load at the beginning of the | |||
1372 | // Stores array, making subsequent dominance checks more likely to succeed | |||
1373 | // early. | |||
1374 | // | |||
1375 | // The threshold here is fairly large because global->local demotion is a | |||
1376 | // very powerful optimization should it fire. | |||
1377 | const unsigned Threshold = 100; | |||
1378 | if (Loads.size() * Stores.size() > Threshold) | |||
1379 | return false; | |||
1380 | ||||
1381 | for (auto *L : Loads) { | |||
1382 | auto *LTy = L->getType(); | |||
1383 | if (none_of(Stores, [&](const StoreInst *S) { | |||
1384 | auto *STy = S->getValueOperand()->getType(); | |||
1385 | // The load is only dominated by the store if DomTree says so | |||
1386 | // and the number of bits loaded in L is less than or equal to | |||
1387 | // the number of bits stored in S. | |||
1388 | return DT.dominates(S, L) && | |||
1389 | DL.getTypeStoreSize(LTy).getFixedSize() <= | |||
1390 | DL.getTypeStoreSize(STy).getFixedSize(); | |||
1391 | })) | |||
1392 | return false; | |||
1393 | } | |||
1394 | // All loads have known dependences inside F, so the global can be localized. | |||
1395 | return true; | |||
1396 | } | |||
1397 | ||||
1398 | /// C may have non-instruction users. Can all of those users be turned into | |||
1399 | /// instructions? | |||
1400 | static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) { | |||
1401 | // We don't do this exhaustively. The most common pattern that we really need | |||
1402 | // to care about is a constant GEP or constant bitcast - so just looking | |||
1403 | // through one single ConstantExpr. | |||
1404 | // | |||
1405 | // The set of constants that this function returns true for must be able to be | |||
1406 | // handled by makeAllConstantUsesInstructions. | |||
1407 | for (auto *U : C->users()) { | |||
1408 | if (isa<Instruction>(U)) | |||
1409 | continue; | |||
1410 | if (!isa<ConstantExpr>(U)) | |||
1411 | // Non instruction, non-constantexpr user; cannot convert this. | |||
1412 | return false; | |||
1413 | for (auto *UU : U->users()) | |||
1414 | if (!isa<Instruction>(UU)) | |||
1415 | // A constantexpr used by another constant. We don't try and recurse any | |||
1416 | // further but just bail out at this point. | |||
1417 | return false; | |||
1418 | } | |||
1419 | ||||
1420 | return true; | |||
1421 | } | |||
1422 | ||||
1423 | /// C may have non-instruction users, and | |||
1424 | /// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the | |||
1425 | /// non-instruction users to instructions. | |||
1426 | static void makeAllConstantUsesInstructions(Constant *C) { | |||
1427 | SmallVector<ConstantExpr*,4> Users; | |||
1428 | for (auto *U : C->users()) { | |||
1429 | if (isa<ConstantExpr>(U)) | |||
1430 | Users.push_back(cast<ConstantExpr>(U)); | |||
1431 | else | |||
1432 | // We should never get here; allNonInstructionUsersCanBeMadeInstructions | |||
1433 | // should not have returned true for C. | |||
1434 | assert(((void)0) | |||
1435 | isa<Instruction>(U) &&((void)0) | |||
1436 | "Can't transform non-constantexpr non-instruction to instruction!")((void)0); | |||
1437 | } | |||
1438 | ||||
1439 | SmallVector<Value*,4> UUsers; | |||
1440 | for (auto *U : Users) { | |||
1441 | UUsers.clear(); | |||
1442 | append_range(UUsers, U->users()); | |||
1443 | for (auto *UU : UUsers) { | |||
1444 | Instruction *UI = cast<Instruction>(UU); | |||
1445 | Instruction *NewU = U->getAsInstruction(); | |||
1446 | NewU->insertBefore(UI); | |||
1447 | UI->replaceUsesOfWith(U, NewU); | |||
1448 | } | |||
1449 | // We've replaced all the uses, so destroy the constant. (destroyConstant | |||
1450 | // will update value handles and metadata.) | |||
1451 | U->destroyConstant(); | |||
1452 | } | |||
1453 | } | |||
1454 | ||||
1455 | /// Analyze the specified global variable and optimize | |||
1456 | /// it if possible. If we make a change, return true. | |||
1457 | static bool | |||
1458 | processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, | |||
1459 | function_ref<TargetLibraryInfo &(Function &)> GetTLI, | |||
1460 | function_ref<DominatorTree &(Function &)> LookupDomTree) { | |||
1461 | auto &DL = GV->getParent()->getDataLayout(); | |||
1462 | // If this is a first class global and has only one accessing function and | |||
1463 | // this function is non-recursive, we replace the global with a local alloca | |||
1464 | // in this function. | |||
1465 | // | |||
1466 | // NOTE: It doesn't make sense to promote non-single-value types since we | |||
1467 | // are just replacing static memory to stack memory. | |||
1468 | // | |||
1469 | // If the global is in different address space, don't bring it to stack. | |||
1470 | if (!GS.HasMultipleAccessingFunctions && | |||
1471 | GS.AccessingFunction && | |||
1472 | GV->getValueType()->isSingleValueType() && | |||
1473 | GV->getType()->getAddressSpace() == 0 && | |||
1474 | !GV->isExternallyInitialized() && | |||
1475 | allNonInstructionUsersCanBeMadeInstructions(GV) && | |||
1476 | GS.AccessingFunction->doesNotRecurse() && | |||
1477 | isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV, | |||
1478 | LookupDomTree)) { | |||
1479 | const DataLayout &DL = GV->getParent()->getDataLayout(); | |||
1480 | ||||
1481 | LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n")do { } while (false); | |||
1482 | Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction | |||
1483 | ->getEntryBlock().begin()); | |||
1484 | Type *ElemTy = GV->getValueType(); | |||
1485 | // FIXME: Pass Global's alignment when globals have alignment | |||
1486 | AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr, | |||
1487 | GV->getName(), &FirstI); | |||
1488 | if (!isa<UndefValue>(GV->getInitializer())) | |||
1489 | new StoreInst(GV->getInitializer(), Alloca, &FirstI); | |||
1490 | ||||
1491 | makeAllConstantUsesInstructions(GV); | |||
1492 | ||||
1493 | GV->replaceAllUsesWith(Alloca); | |||
1494 | GV->eraseFromParent(); | |||
1495 | ++NumLocalized; | |||
1496 | return true; | |||
1497 | } | |||
1498 | ||||
1499 | bool Changed = false; | |||
1500 | ||||
1501 | // If the global is never loaded (but may be stored to), it is dead. | |||
1502 | // Delete it now. | |||
1503 | if (!GS.IsLoaded) { | |||
1504 | LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n")do { } while (false); | |||
1505 | ||||
1506 | if (isLeakCheckerRoot(GV)) { | |||
1507 | // Delete any constant stores to the global. | |||
1508 | Changed = CleanupPointerRootUsers(GV, GetTLI); | |||
1509 | } else { | |||
1510 | // Delete any stores we can find to the global. We may not be able to | |||
1511 | // make it completely dead though. | |||
1512 | Changed = | |||
1513 | CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); | |||
1514 | } | |||
1515 | ||||
1516 | // If the global is dead now, delete it. | |||
1517 | if (GV->use_empty()) { | |||
1518 | GV->eraseFromParent(); | |||
1519 | ++NumDeleted; | |||
1520 | Changed = true; | |||
1521 | } | |||
1522 | return Changed; | |||
1523 | ||||
1524 | } | |||
1525 | if (GS.StoredType <= GlobalStatus::InitializerStored) { | |||
1526 | LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n")do { } while (false); | |||
1527 | ||||
1528 | // Don't actually mark a global constant if it's atomic because atomic loads | |||
1529 | // are implemented by a trivial cmpxchg in some edge-cases and that usually | |||
1530 | // requires write access to the variable even if it's not actually changed. | |||
1531 | if (GS.Ordering == AtomicOrdering::NotAtomic) { | |||
1532 | assert(!GV->isConstant() && "Expected a non-constant global")((void)0); | |||
1533 | GV->setConstant(true); | |||
1534 | Changed = true; | |||
1535 | } | |||
1536 | ||||
1537 | // Clean up any obviously simplifiable users now. | |||
1538 | Changed |= CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); | |||
1539 | ||||
1540 | // If the global is dead now, just nuke it. | |||
1541 | if (GV->use_empty()) { | |||
1542 | LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify "do { } while (false) | |||
1543 | << "all users and delete global!\n")do { } while (false); | |||
1544 | GV->eraseFromParent(); | |||
1545 | ++NumDeleted; | |||
1546 | return true; | |||
1547 | } | |||
1548 | ||||
1549 | // Fall through to the next check; see if we can optimize further. | |||
1550 | ++NumMarked; | |||
1551 | } | |||
1552 | if (!GV->getInitializer()->getType()->isSingleValueType()) { | |||
1553 | const DataLayout &DL = GV->getParent()->getDataLayout(); | |||
1554 | if (SRAGlobal(GV, DL)) | |||
1555 | return true; | |||
1556 | } | |||
1557 | if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) { | |||
1558 | // If the initial value for the global was an undef value, and if only | |||
1559 | // one other value was stored into it, we can just change the | |||
1560 | // initializer to be the stored value, then delete all stores to the | |||
1561 | // global. This allows us to mark it constant. | |||
1562 | if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) | |||
1563 | if (isa<UndefValue>(GV->getInitializer())) { | |||
1564 | // Change the initial value here. | |||
1565 | GV->setInitializer(SOVConstant); | |||
1566 | ||||
1567 | // Clean up any obviously simplifiable users now. | |||
1568 | CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); | |||
1569 | ||||
1570 | if (GV->use_empty()) { | |||
1571 | LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "do { } while (false) | |||
1572 | << "simplify all users and delete global!\n")do { } while (false); | |||
1573 | GV->eraseFromParent(); | |||
1574 | ++NumDeleted; | |||
1575 | } | |||
1576 | ++NumSubstitute; | |||
1577 | return true; | |||
1578 | } | |||
1579 | ||||
1580 | // Try to optimize globals based on the knowledge that only one value | |||
1581 | // (besides its initializer) is ever stored to the global. | |||
1582 | if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, | |||
1583 | GetTLI)) | |||
1584 | return true; | |||
1585 | ||||
1586 | // Otherwise, if the global was not a boolean, we can shrink it to be a | |||
1587 | // boolean. | |||
1588 | if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) { | |||
1589 | if (GS.Ordering == AtomicOrdering::NotAtomic) { | |||
1590 | if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { | |||
1591 | ++NumShrunkToBool; | |||
1592 | return true; | |||
1593 | } | |||
1594 | } | |||
1595 | } | |||
1596 | } | |||
1597 | ||||
1598 | return Changed; | |||
1599 | } | |||
1600 | ||||
1601 | /// Analyze the specified global variable and optimize it if possible. If we | |||
1602 | /// make a change, return true. | |||
1603 | static bool | |||
1604 | processGlobal(GlobalValue &GV, | |||
1605 | function_ref<TargetLibraryInfo &(Function &)> GetTLI, | |||
1606 | function_ref<DominatorTree &(Function &)> LookupDomTree) { | |||
1607 | if (GV.getName().startswith("llvm.")) | |||
1608 | return false; | |||
1609 | ||||
1610 | GlobalStatus GS; | |||
1611 | ||||
1612 | if (GlobalStatus::analyzeGlobal(&GV, GS)) | |||
1613 | return false; | |||
1614 | ||||
1615 | bool Changed = false; | |||
1616 | if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) { | |||
1617 | auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global | |||
1618 | : GlobalValue::UnnamedAddr::Local; | |||
1619 | if (NewUnnamedAddr != GV.getUnnamedAddr()) { | |||
1620 | GV.setUnnamedAddr(NewUnnamedAddr); | |||
1621 | NumUnnamed++; | |||
1622 | Changed = true; | |||
1623 | } | |||
1624 | } | |||
1625 | ||||
1626 | // Do more involved optimizations if the global is internal. | |||
1627 | if (!GV.hasLocalLinkage()) | |||
1628 | return Changed; | |||
1629 | ||||
1630 | auto *GVar = dyn_cast<GlobalVariable>(&GV); | |||
1631 | if (!GVar) | |||
1632 | return Changed; | |||
1633 | ||||
1634 | if (GVar->isConstant() || !GVar->hasInitializer()) | |||
1635 | return Changed; | |||
1636 | ||||
1637 | return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed; | |||
1638 | } | |||
1639 | ||||
1640 | /// Walk all of the direct calls of the specified function, changing them to | |||
1641 | /// FastCC. | |||
1642 | static void ChangeCalleesToFastCall(Function *F) { | |||
1643 | for (User *U : F->users()) { | |||
1644 | if (isa<BlockAddress>(U)) | |||
1645 | continue; | |||
1646 | cast<CallBase>(U)->setCallingConv(CallingConv::Fast); | |||
1647 | } | |||
1648 | } | |||
1649 | ||||
1650 | static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs, | |||
1651 | Attribute::AttrKind A) { | |||
1652 | unsigned AttrIndex; | |||
1653 | if (Attrs.hasAttrSomewhere(A, &AttrIndex)) | |||
1654 | return Attrs.removeAttribute(C, AttrIndex, A); | |||
1655 | return Attrs; | |||
1656 | } | |||
1657 | ||||
1658 | static void RemoveAttribute(Function *F, Attribute::AttrKind A) { | |||
1659 | F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A)); | |||
1660 | for (User *U : F->users()) { | |||
1661 | if (isa<BlockAddress>(U)) | |||
1662 | continue; | |||
1663 | CallBase *CB = cast<CallBase>(U); | |||
1664 | CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A)); | |||
1665 | } | |||
1666 | } | |||
1667 | ||||
1668 | /// Return true if this is a calling convention that we'd like to change. The | |||
1669 | /// idea here is that we don't want to mess with the convention if the user | |||
1670 | /// explicitly requested something with performance implications like coldcc, | |||
1671 | /// GHC, or anyregcc. | |||
1672 | static bool hasChangeableCC(Function *F) { | |||
1673 | CallingConv::ID CC = F->getCallingConv(); | |||
1674 | ||||
1675 | // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc? | |||
1676 | if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall) | |||
1677 | return false; | |||
1678 | ||||
1679 | // FIXME: Change CC for the whole chain of musttail calls when possible. | |||
1680 | // | |||
1681 | // Can't change CC of the function that either has musttail calls, or is a | |||
1682 | // musttail callee itself | |||
1683 | for (User *U : F->users()) { | |||
1684 | if (isa<BlockAddress>(U)) | |||
1685 | continue; | |||
1686 | CallInst* CI = dyn_cast<CallInst>(U); | |||
1687 | if (!CI) | |||
1688 | continue; | |||
1689 | ||||
1690 | if (CI->isMustTailCall()) | |||
1691 | return false; | |||
1692 | } | |||
1693 | ||||
1694 | for (BasicBlock &BB : *F) | |||
1695 | if (BB.getTerminatingMustTailCall()) | |||
1696 | return false; | |||
1697 | ||||
1698 | return true; | |||
1699 | } | |||
1700 | ||||
1701 | /// Return true if the block containing the call site has a BlockFrequency of | |||
1702 | /// less than ColdCCRelFreq% of the entry block. | |||
1703 | static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) { | |||
1704 | const BranchProbability ColdProb(ColdCCRelFreq, 100); | |||
1705 | auto *CallSiteBB = CB.getParent(); | |||
1706 | auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB); | |||
1707 | auto CallerEntryFreq = | |||
1708 | CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock())); | |||
1709 | return CallSiteFreq < CallerEntryFreq * ColdProb; | |||
1710 | } | |||
1711 | ||||
1712 | // This function checks if the input function F is cold at all call sites. It | |||
1713 | // also looks each call site's containing function, returning false if the | |||
1714 | // caller function contains other non cold calls. The input vector AllCallsCold | |||
1715 | // contains a list of functions that only have call sites in cold blocks. | |||
1716 | static bool | |||
1717 | isValidCandidateForColdCC(Function &F, | |||
1718 | function_ref<BlockFrequencyInfo &(Function &)> GetBFI, | |||
1719 | const std::vector<Function *> &AllCallsCold) { | |||
1720 | ||||
1721 | if (F.user_empty()) | |||
1722 | return false; | |||
1723 | ||||
1724 | for (User *U : F.users()) { | |||
1725 | if (isa<BlockAddress>(U)) | |||
1726 | continue; | |||
1727 | ||||
1728 | CallBase &CB = cast<CallBase>(*U); | |||
1729 | Function *CallerFunc = CB.getParent()->getParent(); | |||
1730 | BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc); | |||
1731 | if (!isColdCallSite(CB, CallerBFI)) | |||
1732 | return false; | |||
1733 | if (!llvm::is_contained(AllCallsCold, CallerFunc)) | |||
1734 | return false; | |||
1735 | } | |||
1736 | return true; | |||
1737 | } | |||
1738 | ||||
1739 | static void changeCallSitesToColdCC(Function *F) { | |||
1740 | for (User *U : F->users()) { | |||
1741 | if (isa<BlockAddress>(U)) | |||
1742 | continue; | |||
1743 | cast<CallBase>(U)->setCallingConv(CallingConv::Cold); | |||
1744 | } | |||
1745 | } | |||
1746 | ||||
1747 | // This function iterates over all the call instructions in the input Function | |||
1748 | // and checks that all call sites are in cold blocks and are allowed to use the | |||
1749 | // coldcc calling convention. | |||
1750 | static bool | |||
1751 | hasOnlyColdCalls(Function &F, | |||
1752 | function_ref<BlockFrequencyInfo &(Function &)> GetBFI) { | |||
1753 | for (BasicBlock &BB : F) { | |||
1754 | for (Instruction &I : BB) { | |||
1755 | if (CallInst *CI = dyn_cast<CallInst>(&I)) { | |||
1756 | // Skip over isline asm instructions since they aren't function calls. | |||
1757 | if (CI->isInlineAsm()) | |||
1758 | continue; | |||
1759 | Function *CalledFn = CI->getCalledFunction(); | |||
1760 | if (!CalledFn) | |||
1761 | return false; | |||
1762 | if (!CalledFn->hasLocalLinkage()) | |||
1763 | return false; | |||
1764 | // Skip over instrinsics since they won't remain as function calls. | |||
1765 | if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic) | |||
1766 | continue; | |||
1767 | // Check if it's valid to use coldcc calling convention. | |||
1768 | if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() || | |||
1769 | CalledFn->hasAddressTaken()) | |||
1770 | return false; | |||
1771 | BlockFrequencyInfo &CallerBFI = GetBFI(F); | |||
1772 | if (!isColdCallSite(*CI, CallerBFI)) | |||
1773 | return false; | |||
1774 | } | |||
1775 | } | |||
1776 | } | |||
1777 | return true; | |||
1778 | } | |||
1779 | ||||
1780 | static bool hasMustTailCallers(Function *F) { | |||
1781 | for (User *U : F->users()) { | |||
1782 | CallBase *CB = dyn_cast<CallBase>(U); | |||
1783 | if (!CB) { | |||
1784 | assert(isa<BlockAddress>(U) &&((void)0) | |||
1785 | "Expected either CallBase or BlockAddress")((void)0); | |||
1786 | continue; | |||
1787 | } | |||
1788 | if (CB->isMustTailCall()) | |||
1789 | return true; | |||
1790 | } | |||
1791 | return false; | |||
1792 | } | |||
1793 | ||||
1794 | static bool hasInvokeCallers(Function *F) { | |||
1795 | for (User *U : F->users()) | |||
1796 | if (isa<InvokeInst>(U)) | |||
1797 | return true; | |||
1798 | return false; | |||
1799 | } | |||
1800 | ||||
1801 | static void RemovePreallocated(Function *F) { | |||
1802 | RemoveAttribute(F, Attribute::Preallocated); | |||
1803 | ||||
1804 | auto *M = F->getParent(); | |||
1805 | ||||
1806 | IRBuilder<> Builder(M->getContext()); | |||
1807 | ||||
1808 | // Cannot modify users() while iterating over it, so make a copy. | |||
1809 | SmallVector<User *, 4> PreallocatedCalls(F->users()); | |||
1810 | for (User *U : PreallocatedCalls) { | |||
1811 | CallBase *CB = dyn_cast<CallBase>(U); | |||
1812 | if (!CB) | |||
1813 | continue; | |||
1814 | ||||
1815 | assert(((void)0) | |||
1816 | !CB->isMustTailCall() &&((void)0) | |||
1817 | "Shouldn't call RemotePreallocated() on a musttail preallocated call")((void)0); | |||
1818 | // Create copy of call without "preallocated" operand bundle. | |||
1819 | SmallVector<OperandBundleDef, 1> OpBundles; | |||
1820 | CB->getOperandBundlesAsDefs(OpBundles); | |||
1821 | CallBase *PreallocatedSetup = nullptr; | |||
1822 | for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) { | |||
1823 | if (It->getTag() == "preallocated") { | |||
1824 | PreallocatedSetup = cast<CallBase>(*It->input_begin()); | |||
1825 | OpBundles.erase(It); | |||
1826 | break; | |||
1827 | } | |||
1828 | } | |||
1829 | assert(PreallocatedSetup && "Did not find preallocated bundle")((void)0); | |||
1830 | uint64_t ArgCount = | |||
1831 | cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue(); | |||
1832 | ||||
1833 | assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) &&((void)0) | |||
1834 | "Unknown indirect call type")((void)0); | |||
1835 | CallBase *NewCB = CallBase::Create(CB, OpBundles, CB); | |||
1836 | CB->replaceAllUsesWith(NewCB); | |||
1837 | NewCB->takeName(CB); | |||
1838 | CB->eraseFromParent(); | |||
1839 | ||||
1840 | Builder.SetInsertPoint(PreallocatedSetup); | |||
1841 | auto *StackSave = | |||
1842 | Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stacksave)); | |||
1843 | ||||
1844 | Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction()); | |||
1845 | Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackrestore), | |||
1846 | StackSave); | |||
1847 | ||||
1848 | // Replace @llvm.call.preallocated.arg() with alloca. | |||
1849 | // Cannot modify users() while iterating over it, so make a copy. | |||
1850 | // @llvm.call.preallocated.arg() can be called with the same index multiple | |||
1851 | // times. So for each @llvm.call.preallocated.arg(), we see if we have | |||
1852 | // already created a Value* for the index, and if not, create an alloca and | |||
1853 | // bitcast right after the @llvm.call.preallocated.setup() so that it | |||
1854 | // dominates all uses. | |||
1855 | SmallVector<Value *, 2> ArgAllocas(ArgCount); | |||
1856 | SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users()); | |||
1857 | for (auto *User : PreallocatedArgs) { | |||
1858 | auto *UseCall = cast<CallBase>(User); | |||
1859 | assert(UseCall->getCalledFunction()->getIntrinsicID() ==((void)0) | |||
1860 | Intrinsic::call_preallocated_arg &&((void)0) | |||
1861 | "preallocated token use was not a llvm.call.preallocated.arg")((void)0); | |||
1862 | uint64_t AllocArgIndex = | |||
1863 | cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue(); | |||
1864 | Value *AllocaReplacement = ArgAllocas[AllocArgIndex]; | |||
1865 | if (!AllocaReplacement) { | |||
1866 | auto AddressSpace = UseCall->getType()->getPointerAddressSpace(); | |||
1867 | auto *ArgType = UseCall | |||
1868 | ->getAttribute(AttributeList::FunctionIndex, | |||
1869 | Attribute::Preallocated) | |||
1870 | .getValueAsType(); | |||
1871 | auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction(); | |||
1872 | Builder.SetInsertPoint(InsertBefore); | |||
1873 | auto *Alloca = | |||
1874 | Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg"); | |||
1875 | auto *BitCast = Builder.CreateBitCast( | |||
1876 | Alloca, Type::getInt8PtrTy(M->getContext()), UseCall->getName()); | |||
1877 | ArgAllocas[AllocArgIndex] = BitCast; | |||
1878 | AllocaReplacement = BitCast; | |||
1879 | } | |||
1880 | ||||
1881 | UseCall->replaceAllUsesWith(AllocaReplacement); | |||
1882 | UseCall->eraseFromParent(); | |||
1883 | } | |||
1884 | // Remove @llvm.call.preallocated.setup(). | |||
1885 | cast<Instruction>(PreallocatedSetup)->eraseFromParent(); | |||
1886 | } | |||
1887 | } | |||
1888 | ||||
1889 | static bool | |||
1890 | OptimizeFunctions(Module &M, | |||
1891 | function_ref<TargetLibraryInfo &(Function &)> GetTLI, | |||
1892 | function_ref<TargetTransformInfo &(Function &)> GetTTI, | |||
1893 | function_ref<BlockFrequencyInfo &(Function &)> GetBFI, | |||
1894 | function_ref<DominatorTree &(Function &)> LookupDomTree, | |||
1895 | SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { | |||
1896 | ||||
1897 | bool Changed = false; | |||
1898 | ||||
1899 | std::vector<Function *> AllCallsCold; | |||
1900 | for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) { | |||
1901 | Function *F = &*FI++; | |||
1902 | if (hasOnlyColdCalls(*F, GetBFI)) | |||
1903 | AllCallsCold.push_back(F); | |||
1904 | } | |||
1905 | ||||
1906 | // Optimize functions. | |||
1907 | for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { | |||
1908 | Function *F = &*FI++; | |||
1909 | ||||
1910 | // Don't perform global opt pass on naked functions; we don't want fast | |||
1911 | // calling conventions for naked functions. | |||
1912 | if (F->hasFnAttribute(Attribute::Naked)) | |||
1913 | continue; | |||
1914 | ||||
1915 | // Functions without names cannot be referenced outside this module. | |||
1916 | if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage()) | |||
1917 | F->setLinkage(GlobalValue::InternalLinkage); | |||
1918 | ||||
1919 | if (deleteIfDead(*F, NotDiscardableComdats)) { | |||
1920 | Changed = true; | |||
1921 | continue; | |||
1922 | } | |||
1923 | ||||
1924 | // LLVM's definition of dominance allows instructions that are cyclic | |||
1925 | // in unreachable blocks, e.g.: | |||
1926 | // %pat = select i1 %condition, @global, i16* %pat | |||
1927 | // because any instruction dominates an instruction in a block that's | |||
1928 | // not reachable from entry. | |||
1929 | // So, remove unreachable blocks from the function, because a) there's | |||
1930 | // no point in analyzing them and b) GlobalOpt should otherwise grow | |||
1931 | // some more complicated logic to break these cycles. | |||
1932 | // Removing unreachable blocks might invalidate the dominator so we | |||
1933 | // recalculate it. | |||
1934 | if (!F->isDeclaration()) { | |||
1935 | if (removeUnreachableBlocks(*F)) { | |||
1936 | auto &DT = LookupDomTree(*F); | |||
1937 | DT.recalculate(*F); | |||
1938 | Changed = true; | |||
1939 | } | |||
1940 | } | |||
1941 | ||||
1942 | Changed |= processGlobal(*F, GetTLI, LookupDomTree); | |||
1943 | ||||
1944 | if (!F->hasLocalLinkage()) | |||
1945 | continue; | |||
1946 | ||||
1947 | // If we have an inalloca parameter that we can safely remove the | |||
1948 | // inalloca attribute from, do so. This unlocks optimizations that | |||
1949 | // wouldn't be safe in the presence of inalloca. | |||
1950 | // FIXME: We should also hoist alloca affected by this to the entry | |||
1951 | // block if possible. | |||
1952 | if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) && | |||
1953 | !F->hasAddressTaken() && !hasMustTailCallers(F)) { | |||
1954 | RemoveAttribute(F, Attribute::InAlloca); | |||
1955 | Changed = true; | |||
1956 | } | |||
1957 | ||||
1958 | // FIXME: handle invokes | |||
1959 | // FIXME: handle musttail | |||
1960 | if (F->getAttributes().hasAttrSomewhere(Attribute::Preallocated)) { | |||
1961 | if (!F->hasAddressTaken() && !hasMustTailCallers(F) && | |||
1962 | !hasInvokeCallers(F)) { | |||
1963 | RemovePreallocated(F); | |||
1964 | Changed = true; | |||
1965 | } | |||
1966 | continue; | |||
1967 | } | |||
1968 | ||||
1969 | if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) { | |||
1970 | NumInternalFunc++; | |||
1971 | TargetTransformInfo &TTI = GetTTI(*F); | |||
1972 | // Change the calling convention to coldcc if either stress testing is | |||
1973 | // enabled or the target would like to use coldcc on functions which are | |||
1974 | // cold at all call sites and the callers contain no other non coldcc | |||
1975 | // calls. | |||
1976 | if (EnableColdCCStressTest || | |||
1977 | (TTI.useColdCCForColdCall(*F) && | |||
1978 | isValidCandidateForColdCC(*F, GetBFI, AllCallsCold))) { | |||
1979 | F->setCallingConv(CallingConv::Cold); | |||
1980 | changeCallSitesToColdCC(F); | |||
1981 | Changed = true; | |||
1982 | NumColdCC++; | |||
1983 | } | |||
1984 | } | |||
1985 | ||||
1986 | if (hasChangeableCC(F) && !F->isVarArg() && | |||
1987 | !F->hasAddressTaken()) { | |||
1988 | // If this function has a calling convention worth changing, is not a | |||
1989 | // varargs function, and is only called directly, promote it to use the | |||
1990 | // Fast calling convention. | |||
1991 | F->setCallingConv(CallingConv::Fast); | |||
1992 | ChangeCalleesToFastCall(F); | |||
1993 | ++NumFastCallFns; | |||
1994 | Changed = true; | |||
1995 | } | |||
1996 | ||||
1997 | if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && | |||
1998 | !F->hasAddressTaken()) { | |||
1999 | // The function is not used by a trampoline intrinsic, so it is safe | |||
2000 | // to remove the 'nest' attribute. | |||
2001 | RemoveAttribute(F, Attribute::Nest); | |||
2002 | ++NumNestRemoved; | |||
2003 | Changed = true; | |||
2004 | } | |||
2005 | } | |||
2006 | return Changed; | |||
2007 | } | |||
2008 | ||||
2009 | static bool | |||
2010 | OptimizeGlobalVars(Module &M, | |||
2011 | function_ref<TargetLibraryInfo &(Function &)> GetTLI, | |||
2012 | function_ref<DominatorTree &(Function &)> LookupDomTree, | |||
2013 | SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { | |||
2014 | bool Changed = false; | |||
2015 | ||||
2016 | for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); | |||
2017 | GVI != E; ) { | |||
2018 | GlobalVariable *GV = &*GVI++; | |||
2019 | // Global variables without names cannot be referenced outside this module. | |||
2020 | if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage()) | |||
2021 | GV->setLinkage(GlobalValue::InternalLinkage); | |||
2022 | // Simplify the initializer. | |||
2023 | if (GV->hasInitializer()) | |||
2024 | if (auto *C = dyn_cast<Constant>(GV->getInitializer())) { | |||
2025 | auto &DL = M.getDataLayout(); | |||
2026 | // TLI is not used in the case of a Constant, so use default nullptr | |||
2027 | // for that optional parameter, since we don't have a Function to | |||
2028 | // provide GetTLI anyway. | |||
2029 | Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr); | |||
2030 | if (New != C) | |||
2031 | GV->setInitializer(New); | |||
2032 | } | |||
2033 | ||||
2034 | if (deleteIfDead(*GV, NotDiscardableComdats)) { | |||
2035 | Changed = true; | |||
2036 | continue; | |||
2037 | } | |||
2038 | ||||
2039 | Changed |= processGlobal(*GV, GetTLI, LookupDomTree); | |||
2040 | } | |||
2041 | return Changed; | |||
2042 | } | |||
2043 | ||||
2044 | /// Evaluate a piece of a constantexpr store into a global initializer. This | |||
2045 | /// returns 'Init' modified to reflect 'Val' stored into it. At this point, the | |||
2046 | /// GEP operands of Addr [0, OpNo) have been stepped into. | |||
2047 | static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, | |||
2048 | ConstantExpr *Addr, unsigned OpNo) { | |||
2049 | // Base case of the recursion. | |||
2050 | if (OpNo == Addr->getNumOperands()) { | |||
2051 | assert(Val->getType() == Init->getType() && "Type mismatch!")((void)0); | |||
2052 | return Val; | |||
2053 | } | |||
2054 | ||||
2055 | SmallVector<Constant*, 32> Elts; | |||
2056 | if (StructType *STy = dyn_cast<StructType>(Init->getType())) { | |||
2057 | // Break up the constant into its elements. | |||
2058 | for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) | |||
2059 | Elts.push_back(Init->getAggregateElement(i)); | |||
2060 | ||||
2061 | // Replace the element that we are supposed to. | |||
2062 | ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); | |||
2063 | unsigned Idx = CU->getZExtValue(); | |||
2064 | assert(Idx < STy->getNumElements() && "Struct index out of range!")((void)0); | |||
2065 | Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); | |||
2066 | ||||
2067 | // Return the modified struct. | |||
2068 | return ConstantStruct::get(STy, Elts); | |||
2069 | } | |||
2070 | ||||
2071 | ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); | |||
2072 | uint64_t NumElts; | |||
2073 | if (ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) | |||
2074 | NumElts = ATy->getNumElements(); | |||
2075 | else | |||
2076 | NumElts = cast<FixedVectorType>(Init->getType())->getNumElements(); | |||
2077 | ||||
2078 | // Break up the array into elements. | |||
2079 | for (uint64_t i = 0, e = NumElts; i != e; ++i) | |||
2080 | Elts.push_back(Init->getAggregateElement(i)); | |||
2081 | ||||
2082 | assert(CI->getZExtValue() < NumElts)((void)0); | |||
2083 | Elts[CI->getZExtValue()] = | |||
2084 | EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); | |||
2085 | ||||
2086 | if (Init->getType()->isArrayTy()) | |||
2087 | return ConstantArray::get(cast<ArrayType>(Init->getType()), Elts); | |||
2088 | return ConstantVector::get(Elts); | |||
2089 | } | |||
2090 | ||||
2091 | /// We have decided that Addr (which satisfies the predicate | |||
2092 | /// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. | |||
2093 | static void CommitValueTo(Constant *Val, Constant *Addr) { | |||
2094 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { | |||
2095 | assert(GV->hasInitializer())((void)0); | |||
2096 | GV->setInitializer(Val); | |||
2097 | return; | |||
2098 | } | |||
2099 | ||||
2100 | ConstantExpr *CE = cast<ConstantExpr>(Addr); | |||
2101 | GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); | |||
2102 | GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); | |||
2103 | } | |||
2104 | ||||
2105 | /// Given a map of address -> value, where addresses are expected to be some form | |||
2106 | /// of either a global or a constant GEP, set the initializer for the address to | |||
2107 | /// be the value. This performs mostly the same function as CommitValueTo() | |||
2108 | /// and EvaluateStoreInto() but is optimized to be more efficient for the common | |||
2109 | /// case where the set of addresses are GEPs sharing the same underlying global, | |||
2110 | /// processing the GEPs in batches rather than individually. | |||
2111 | /// | |||
2112 | /// To give an example, consider the following C++ code adapted from the clang | |||
2113 | /// regression tests: | |||
2114 | /// struct S { | |||
2115 | /// int n = 10; | |||
2116 | /// int m = 2 * n; | |||
2117 | /// S(int a) : n(a) {} | |||
2118 | /// }; | |||
2119 | /// | |||
2120 | /// template<typename T> | |||
2121 | /// struct U { | |||
2122 | /// T *r = &q; | |||
2123 | /// T q = 42; | |||
2124 | /// U *p = this; | |||
2125 | /// }; | |||
2126 | /// | |||
2127 | /// U<S> e; | |||
2128 | /// | |||
2129 | /// The global static constructor for 'e' will need to initialize 'r' and 'p' of | |||
2130 | /// the outer struct, while also initializing the inner 'q' structs 'n' and 'm' | |||
2131 | /// members. This batch algorithm will simply use general CommitValueTo() method | |||
2132 | /// to handle the complex nested S struct initialization of 'q', before | |||
2133 | /// processing the outermost members in a single batch. Using CommitValueTo() to | |||
2134 | /// handle member in the outer struct is inefficient when the struct/array is | |||
2135 | /// very large as we end up creating and destroy constant arrays for each | |||
2136 | /// initialization. | |||
2137 | /// For the above case, we expect the following IR to be generated: | |||
2138 | /// | |||
2139 | /// %struct.U = type { %struct.S*, %struct.S, %struct.U* } | |||
2140 | /// %struct.S = type { i32, i32 } | |||
2141 | /// @e = global %struct.U { %struct.S* gep inbounds (%struct.U, %struct.U* @e, | |||
2142 | /// i64 0, i32 1), | |||
2143 | /// %struct.S { i32 42, i32 84 }, %struct.U* @e } | |||
2144 | /// The %struct.S { i32 42, i32 84 } inner initializer is treated as a complex | |||
2145 | /// constant expression, while the other two elements of @e are "simple". | |||
2146 | static void BatchCommitValueTo(const DenseMap<Constant*, Constant*> &Mem) { | |||
2147 | SmallVector<std::pair<GlobalVariable*, Constant*>, 32> GVs; | |||
2148 | SmallVector<std::pair<ConstantExpr*, Constant*>, 32> ComplexCEs; | |||
2149 | SmallVector<std::pair<ConstantExpr*, Constant*>, 32> SimpleCEs; | |||
2150 | SimpleCEs.reserve(Mem.size()); | |||
2151 | ||||
2152 | for (const auto &I : Mem) { | |||
2153 | if (auto *GV = dyn_cast<GlobalVariable>(I.first)) { | |||
2154 | GVs.push_back(std::make_pair(GV, I.second)); | |||
2155 | } else { | |||
2156 | ConstantExpr *GEP = cast<ConstantExpr>(I.first); | |||
2157 | // We don't handle the deeply recursive case using the batch method. | |||
2158 | if (GEP->getNumOperands() > 3) | |||
2159 | ComplexCEs.push_back(std::make_pair(GEP, I.second)); | |||
2160 | else | |||
2161 | SimpleCEs.push_back(std::make_pair(GEP, I.second)); | |||
2162 | } | |||
2163 | } | |||
2164 | ||||
2165 | // The algorithm below doesn't handle cases like nested structs, so use the | |||
2166 | // slower fully general method if we have to. | |||
2167 | for (auto ComplexCE : ComplexCEs) | |||
2168 | CommitValueTo(ComplexCE.second, ComplexCE.first); | |||
2169 | ||||
2170 | for (auto GVPair : GVs) { | |||
2171 | assert(GVPair.first->hasInitializer())((void)0); | |||
2172 | GVPair.first->setInitializer(GVPair.second); | |||
2173 | } | |||
2174 | ||||
2175 | if (SimpleCEs.empty()) | |||
2176 | return; | |||
2177 | ||||
2178 | // We cache a single global's initializer elements in the case where the | |||
2179 | // subsequent address/val pair uses the same one. This avoids throwing away and | |||
2180 | // rebuilding the constant struct/vector/array just because one element is | |||
2181 | // modified at a time. | |||
2182 | SmallVector<Constant *, 32> Elts; | |||
2183 | Elts.reserve(SimpleCEs.size()); | |||
2184 | GlobalVariable *CurrentGV = nullptr; | |||
2185 | ||||
2186 | auto commitAndSetupCache = [&](GlobalVariable *GV, bool Update) { | |||
2187 | Constant *Init = GV->getInitializer(); | |||
| ||||
2188 | Type *Ty = Init->getType(); | |||
2189 | if (Update) { | |||
2190 | if (CurrentGV) { | |||
2191 | assert(CurrentGV && "Expected a GV to commit to!")((void)0); | |||
2192 | Type *CurrentInitTy = CurrentGV->getInitializer()->getType(); | |||
2193 | // We have a valid cache that needs to be committed. | |||
2194 | if (StructType *STy = dyn_cast<StructType>(CurrentInitTy)) | |||
2195 | CurrentGV->setInitializer(ConstantStruct::get(STy, Elts)); | |||
2196 | else if (ArrayType *ArrTy = dyn_cast<ArrayType>(CurrentInitTy)) | |||
2197 | CurrentGV->setInitializer(ConstantArray::get(ArrTy, Elts)); | |||
2198 | else | |||
2199 | CurrentGV->setInitializer(ConstantVector::get(Elts)); | |||
2200 | } | |||
2201 | if (CurrentGV == GV) | |||
2202 | return; | |||
2203 | // Need to clear and set up cache for new initializer. | |||
2204 | CurrentGV = GV; | |||
2205 | Elts.clear(); | |||
2206 | unsigned NumElts; | |||
2207 | if (auto *STy = dyn_cast<StructType>(Ty)) | |||
2208 | NumElts = STy->getNumElements(); | |||
2209 | else if (auto *ATy = dyn_cast<ArrayType>(Ty)) | |||
2210 | NumElts = ATy->getNumElements(); | |||
2211 | else | |||
2212 | NumElts = cast<FixedVectorType>(Ty)->getNumElements(); | |||
2213 | for (unsigned i = 0, e = NumElts; i != e; ++i) | |||
2214 | Elts.push_back(Init->getAggregateElement(i)); | |||
2215 | } | |||
2216 | }; | |||
2217 | ||||
2218 | for (auto CEPair : SimpleCEs) { | |||
2219 | ConstantExpr *GEP = CEPair.first; | |||
2220 | Constant *Val = CEPair.second; | |||
2221 | ||||
2222 | GlobalVariable *GV = cast<GlobalVariable>(GEP->getOperand(0)); | |||
2223 | commitAndSetupCache(GV, GV != CurrentGV); | |||
2224 | ConstantInt *CI = cast<ConstantInt>(GEP->getOperand(2)); | |||
2225 | Elts[CI->getZExtValue()] = Val; | |||
2226 | } | |||
2227 | // The last initializer in the list needs to be committed, others | |||
2228 | // will be committed on a new initializer being processed. | |||
2229 | commitAndSetupCache(CurrentGV, true); | |||
2230 | } | |||
2231 | ||||
2232 | /// Evaluate static constructors in the function, if we can. Return true if we | |||
2233 | /// can, false otherwise. | |||
2234 | static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, | |||
2235 | TargetLibraryInfo *TLI) { | |||
2236 | // Call the function. | |||
2237 | Evaluator Eval(DL, TLI); | |||
2238 | Constant *RetValDummy; | |||
2239 | bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy, | |||
2240 | SmallVector<Constant*, 0>()); | |||
2241 | ||||
2242 | if (EvalSuccess) { | |||
2243 | ++NumCtorsEvaluated; | |||
2244 | ||||
2245 | // We succeeded at evaluation: commit the result. | |||
2246 | LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"do { } while (false) | |||
2247 | << F->getName() << "' to "do { } while (false) | |||
2248 | << Eval.getMutatedMemory().size() << " stores.\n")do { } while (false); | |||
2249 | BatchCommitValueTo(Eval.getMutatedMemory()); | |||
2250 | for (GlobalVariable *GV : Eval.getInvariants()) | |||
2251 | GV->setConstant(true); | |||
2252 | } | |||
2253 | ||||
2254 | return EvalSuccess; | |||
2255 | } | |||
2256 | ||||
2257 | static int compareNames(Constant *const *A, Constant *const *B) { | |||
2258 | Value *AStripped = (*A)->stripPointerCasts(); | |||
2259 | Value *BStripped = (*B)->stripPointerCasts(); | |||
2260 | return AStripped->getName().compare(BStripped->getName()); | |||
2261 | } | |||
2262 | ||||
2263 | static void setUsedInitializer(GlobalVariable &V, | |||
2264 | const SmallPtrSetImpl<GlobalValue *> &Init) { | |||
2265 | if (Init.empty()) { | |||
2266 | V.eraseFromParent(); | |||
2267 | return; | |||
2268 | } | |||
2269 | ||||
2270 | // Type of pointer to the array of pointers. | |||
2271 | PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0); | |||
2272 | ||||
2273 | SmallVector<Constant *, 8> UsedArray; | |||
2274 | for (GlobalValue *GV : Init) { | |||
2275 | Constant *Cast | |||
2276 | = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy); | |||
2277 | UsedArray.push_back(Cast); | |||
2278 | } | |||
2279 | // Sort to get deterministic order. | |||
2280 | array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames); | |||
2281 | ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size()); | |||
2282 | ||||
2283 | Module *M = V.getParent(); | |||
2284 | V.removeFromParent(); | |||
2285 | GlobalVariable *NV = | |||
2286 | new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage, | |||
2287 | ConstantArray::get(ATy, UsedArray), ""); | |||
2288 | NV->takeName(&V); | |||
2289 | NV->setSection("llvm.metadata"); | |||
2290 | delete &V; | |||
2291 | } | |||
2292 | ||||
2293 | namespace { | |||
2294 | ||||
2295 | /// An easy to access representation of llvm.used and llvm.compiler.used. | |||
2296 | class LLVMUsed { | |||
2297 | SmallPtrSet<GlobalValue *, 4> Used; | |||
2298 | SmallPtrSet<GlobalValue *, 4> CompilerUsed; | |||
2299 | GlobalVariable *UsedV; | |||
2300 | GlobalVariable *CompilerUsedV; | |||
2301 | ||||
2302 | public: | |||
2303 | LLVMUsed(Module &M) { | |||
2304 | SmallVector<GlobalValue *, 4> Vec; | |||
2305 | UsedV = collectUsedGlobalVariables(M, Vec, false); | |||
2306 | Used = {Vec.begin(), Vec.end()}; | |||
2307 | Vec.clear(); | |||
2308 | CompilerUsedV = collectUsedGlobalVariables(M, Vec, true); | |||
2309 | CompilerUsed = {Vec.begin(), Vec.end()}; | |||
2310 | } | |||
2311 | ||||
2312 | using iterator = SmallPtrSet<GlobalValue *, 4>::iterator; | |||
2313 | using used_iterator_range = iterator_range<iterator>; | |||
2314 | ||||
2315 | iterator usedBegin() { return Used.begin(); } | |||
2316 | iterator usedEnd() { return Used.end(); } | |||
2317 | ||||
2318 | used_iterator_range used() { | |||
2319 | return used_iterator_range(usedBegin(), usedEnd()); | |||
2320 | } | |||
2321 | ||||
2322 | iterator compilerUsedBegin() { return CompilerUsed.begin(); } | |||
2323 | iterator compilerUsedEnd() { return CompilerUsed.end(); } | |||
2324 | ||||
2325 | used_iterator_range compilerUsed() { | |||
2326 | return used_iterator_range(compilerUsedBegin(), compilerUsedEnd()); | |||
2327 | } | |||
2328 | ||||
2329 | bool usedCount(GlobalValue *GV) const { return Used.count(GV); } | |||
2330 | ||||
2331 | bool compilerUsedCount(GlobalValue *GV) const { | |||
2332 | return CompilerUsed.count(GV); | |||
2333 | } | |||
2334 | ||||
2335 | bool usedErase(GlobalValue *GV) { return Used.erase(GV); } | |||
2336 | bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); } | |||
2337 | bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; } | |||
2338 | ||||
2339 | bool compilerUsedInsert(GlobalValue *GV) { | |||
2340 | return CompilerUsed.insert(GV).second; | |||
2341 | } | |||
2342 | ||||
2343 | void syncVariablesAndSets() { | |||
2344 | if (UsedV) | |||
2345 | setUsedInitializer(*UsedV, Used); | |||
2346 | if (CompilerUsedV) | |||
2347 | setUsedInitializer(*CompilerUsedV, CompilerUsed); | |||
2348 | } | |||
2349 | }; | |||
2350 | ||||
2351 | } // end anonymous namespace | |||
2352 | ||||
2353 | static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) { | |||
2354 | if (GA.use_empty()) // No use at all. | |||
2355 | return false; | |||
2356 | ||||
2357 | assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&((void)0) | |||
2358 | "We should have removed the duplicated "((void)0) | |||
2359 | "element from llvm.compiler.used")((void)0); | |||
2360 | if (!GA.hasOneUse()) | |||
2361 | // Strictly more than one use. So at least one is not in llvm.used and | |||
2362 | // llvm.compiler.used. | |||
2363 | return true; | |||
2364 | ||||
2365 | // Exactly one use. Check if it is in llvm.used or llvm.compiler.used. | |||
2366 | return !U.usedCount(&GA) && !U.compilerUsedCount(&GA); | |||
2367 | } | |||
2368 | ||||
2369 | static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V, | |||
2370 | const LLVMUsed &U) { | |||
2371 | unsigned N = 2; | |||
2372 | assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&((void)0) | |||
2373 | "We should have removed the duplicated "((void)0) | |||
2374 | "element from llvm.compiler.used")((void)0); | |||
2375 | if (U.usedCount(&V) || U.compilerUsedCount(&V)) | |||
2376 | ++N; | |||
2377 | return V.hasNUsesOrMore(N); | |||
2378 | } | |||
2379 | ||||
2380 | static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) { | |||
2381 | if (!GA.hasLocalLinkage()) | |||
2382 | return true; | |||
2383 | ||||
2384 | return U.usedCount(&GA) || U.compilerUsedCount(&GA); | |||
2385 | } | |||
2386 | ||||
2387 | static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U, | |||
2388 | bool &RenameTarget) { | |||
2389 | RenameTarget = false; | |||
2390 | bool Ret = false; | |||
2391 | if (hasUseOtherThanLLVMUsed(GA, U)) | |||
2392 | Ret = true; | |||
2393 | ||||
2394 | // If the alias is externally visible, we may still be able to simplify it. | |||
2395 | if (!mayHaveOtherReferences(GA, U)) | |||
2396 | return Ret; | |||
2397 | ||||
2398 | // If the aliasee has internal linkage, give it the name and linkage | |||
2399 | // of the alias, and delete the alias. This turns: | |||
2400 | // define internal ... @f(...) | |||
2401 | // @a = alias ... @f | |||
2402 | // into: | |||
2403 | // define ... @a(...) | |||
2404 | Constant *Aliasee = GA.getAliasee(); | |||
2405 | GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); | |||
2406 | if (!Target->hasLocalLinkage()) | |||
2407 | return Ret; | |||
2408 | ||||
2409 | // Do not perform the transform if multiple aliases potentially target the | |||
2410 | // aliasee. This check also ensures that it is safe to replace the section | |||
2411 | // and other attributes of the aliasee with those of the alias. | |||
2412 | if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U)) | |||
2413 | return Ret; | |||
2414 | ||||
2415 | RenameTarget = true; | |||
2416 | return true; | |||
2417 | } | |||
2418 | ||||
2419 | static bool | |||
2420 | OptimizeGlobalAliases(Module &M, | |||
2421 | SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { | |||
2422 | bool Changed = false; | |||
2423 | LLVMUsed Used(M); | |||
2424 | ||||
2425 | for (GlobalValue *GV : Used.used()) | |||
2426 | Used.compilerUsedErase(GV); | |||
2427 | ||||
2428 | for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); | |||
2429 | I != E;) { | |||
2430 | GlobalAlias *J = &*I++; | |||
2431 | ||||
2432 | // Aliases without names cannot be referenced outside this module. | |||
2433 | if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage()) | |||
2434 | J->setLinkage(GlobalValue::InternalLinkage); | |||
2435 | ||||
2436 | if (deleteIfDead(*J, NotDiscardableComdats)) { | |||
2437 | Changed = true; | |||
2438 | continue; | |||
2439 | } | |||
2440 | ||||
2441 | // If the alias can change at link time, nothing can be done - bail out. | |||
2442 | if (J->isInterposable()) | |||
2443 | continue; | |||
2444 | ||||
2445 | Constant *Aliasee = J->getAliasee(); | |||
2446 | GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts()); | |||
2447 | // We can't trivially replace the alias with the aliasee if the aliasee is | |||
2448 | // non-trivial in some way. We also can't replace the alias with the aliasee | |||
2449 | // if the aliasee is interposable because aliases point to the local | |||
2450 | // definition. | |||
2451 | // TODO: Try to handle non-zero GEPs of local aliasees. | |||
2452 | if (!Target || Target->isInterposable()) | |||
2453 | continue; | |||
2454 | Target->removeDeadConstantUsers(); | |||
2455 | ||||
2456 | // Make all users of the alias use the aliasee instead. | |||
2457 | bool RenameTarget; | |||
2458 | if (!hasUsesToReplace(*J, Used, RenameTarget)) | |||
2459 | continue; | |||
2460 | ||||
2461 | J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType())); | |||
2462 | ++NumAliasesResolved; | |||
2463 | Changed = true; | |||
2464 | ||||
2465 | if (RenameTarget) { | |||
2466 | // Give the aliasee the name, linkage and other attributes of the alias. | |||
2467 | Target->takeName(&*J); | |||
2468 | Target->setLinkage(J->getLinkage()); | |||
2469 | Target->setDSOLocal(J->isDSOLocal()); | |||
2470 | Target->setVisibility(J->getVisibility()); | |||
2471 | Target->setDLLStorageClass(J->getDLLStorageClass()); | |||
2472 | ||||
2473 | if (Used.usedErase(&*J)) | |||
2474 | Used.usedInsert(Target); | |||
2475 | ||||
2476 | if (Used.compilerUsedErase(&*J)) | |||
2477 | Used.compilerUsedInsert(Target); | |||
2478 | } else if (mayHaveOtherReferences(*J, Used)) | |||
2479 | continue; | |||
2480 | ||||
2481 | // Delete the alias. | |||
2482 | M.getAliasList().erase(J); | |||
2483 | ++NumAliasesRemoved; | |||
2484 | Changed = true; | |||
2485 | } | |||
2486 | ||||
2487 | Used.syncVariablesAndSets(); | |||
2488 | ||||
2489 | return Changed; | |||
2490 | } | |||
2491 | ||||
2492 | static Function * | |||
2493 | FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { | |||
2494 | // Hack to get a default TLI before we have actual Function. | |||
2495 | auto FuncIter = M.begin(); | |||
2496 | if (FuncIter == M.end()) | |||
2497 | return nullptr; | |||
2498 | auto *TLI = &GetTLI(*FuncIter); | |||
2499 | ||||
2500 | LibFunc F = LibFunc_cxa_atexit; | |||
2501 | if (!TLI->has(F)) | |||
2502 | return nullptr; | |||
2503 | ||||
2504 | Function *Fn = M.getFunction(TLI->getName(F)); | |||
2505 | if (!Fn) | |||
2506 | return nullptr; | |||
2507 | ||||
2508 | // Now get the actual TLI for Fn. | |||
2509 | TLI = &GetTLI(*Fn); | |||
2510 | ||||
2511 | // Make sure that the function has the correct prototype. | |||
2512 | if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit) | |||
2513 | return nullptr; | |||
2514 | ||||
2515 | return Fn; | |||
2516 | } | |||
2517 | ||||
2518 | /// Returns whether the given function is an empty C++ destructor and can | |||
2519 | /// therefore be eliminated. | |||
2520 | /// Note that we assume that other optimization passes have already simplified | |||
2521 | /// the code so we simply check for 'ret'. | |||
2522 | static bool cxxDtorIsEmpty(const Function &Fn) { | |||
2523 | // FIXME: We could eliminate C++ destructors if they're readonly/readnone and | |||
2524 | // nounwind, but that doesn't seem worth doing. | |||
2525 | if (Fn.isDeclaration()) | |||
2526 | return false; | |||
2527 | ||||
2528 | for (auto &I : Fn.getEntryBlock()) { | |||
2529 | if (isa<DbgInfoIntrinsic>(I)) | |||
2530 | continue; | |||
2531 | if (isa<ReturnInst>(I)) | |||
2532 | return true; | |||
2533 | break; | |||
2534 | } | |||
2535 | return false; | |||
2536 | } | |||
2537 | ||||
2538 | static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { | |||
2539 | /// Itanium C++ ABI p3.3.5: | |||
2540 | /// | |||
2541 | /// After constructing a global (or local static) object, that will require | |||
2542 | /// destruction on exit, a termination function is registered as follows: | |||
2543 | /// | |||
2544 | /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d ); | |||
2545 | /// | |||
2546 | /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the | |||
2547 | /// call f(p) when DSO d is unloaded, before all such termination calls | |||
2548 | /// registered before this one. It returns zero if registration is | |||
2549 | /// successful, nonzero on failure. | |||
2550 | ||||
2551 | // This pass will look for calls to __cxa_atexit where the function is trivial | |||
2552 | // and remove them. | |||
2553 | bool Changed = false; | |||
2554 | ||||
2555 | for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end(); | |||
2556 | I != E;) { | |||
2557 | // We're only interested in calls. Theoretically, we could handle invoke | |||
2558 | // instructions as well, but neither llvm-gcc nor clang generate invokes | |||
2559 | // to __cxa_atexit. | |||
2560 | CallInst *CI = dyn_cast<CallInst>(*I++); | |||
2561 | if (!CI) | |||
2562 | continue; | |||
2563 | ||||
2564 | Function *DtorFn = | |||
2565 | dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts()); | |||
2566 | if (!DtorFn || !cxxDtorIsEmpty(*DtorFn)) | |||
2567 | continue; | |||
2568 | ||||
2569 | // Just remove the call. | |||
2570 | CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); | |||
2571 | CI->eraseFromParent(); | |||
2572 | ||||
2573 | ++NumCXXDtorsRemoved; | |||
2574 | ||||
2575 | Changed |= true; | |||
2576 | } | |||
2577 | ||||
2578 | return Changed; | |||
2579 | } | |||
2580 | ||||
2581 | static bool optimizeGlobalsInModule( | |||
2582 | Module &M, const DataLayout &DL, | |||
2583 | function_ref<TargetLibraryInfo &(Function &)> GetTLI, | |||
2584 | function_ref<TargetTransformInfo &(Function &)> GetTTI, | |||
2585 | function_ref<BlockFrequencyInfo &(Function &)> GetBFI, | |||
2586 | function_ref<DominatorTree &(Function &)> LookupDomTree) { | |||
2587 | SmallPtrSet<const Comdat *, 8> NotDiscardableComdats; | |||
2588 | bool Changed = false; | |||
2589 | bool LocalChange = true; | |||
2590 | while (LocalChange) { | |||
2591 | LocalChange = false; | |||
2592 | ||||
2593 | NotDiscardableComdats.clear(); | |||
2594 | for (const GlobalVariable &GV : M.globals()) | |||
2595 | if (const Comdat *C = GV.getComdat()) | |||
2596 | if (!GV.isDiscardableIfUnused() || !GV.use_empty()) | |||
2597 | NotDiscardableComdats.insert(C); | |||
2598 | for (Function &F : M) | |||
2599 | if (const Comdat *C = F.getComdat()) | |||
2600 | if (!F.isDefTriviallyDead()) | |||
2601 | NotDiscardableComdats.insert(C); | |||
2602 | for (GlobalAlias &GA : M.aliases()) | |||
2603 | if (const Comdat *C = GA.getComdat()) | |||
2604 | if (!GA.isDiscardableIfUnused() || !GA.use_empty()) | |||
2605 | NotDiscardableComdats.insert(C); | |||
2606 | ||||
2607 | // Delete functions that are trivially dead, ccc -> fastcc | |||
2608 | LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree, | |||
2609 | NotDiscardableComdats); | |||
2610 | ||||
2611 | // Optimize global_ctors list. | |||
2612 | LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { | |||
2613 | return EvaluateStaticConstructor(F, DL, &GetTLI(*F)); | |||
| ||||
2614 | }); | |||
2615 | ||||
2616 | // Optimize non-address-taken globals. | |||
2617 | LocalChange |= | |||
2618 | OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats); | |||
2619 | ||||
2620 | // Resolve aliases, when possible. | |||
2621 | LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats); | |||
2622 | ||||
2623 | // Try to remove trivial global destructors if they are not removed | |||
2624 | // already. | |||
2625 | Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI); | |||
2626 | if (CXAAtExitFn) | |||
2627 | LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); | |||
2628 | ||||
2629 | Changed |= LocalChange; | |||
2630 | } | |||
2631 | ||||
2632 | // TODO: Move all global ctors functions to the end of the module for code | |||
2633 | // layout. | |||
2634 | ||||
2635 | return Changed; | |||
2636 | } | |||
2637 | ||||
2638 | PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { | |||
2639 | auto &DL = M.getDataLayout(); | |||
2640 | auto &FAM = | |||
2641 | AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); | |||
2642 | auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{ | |||
2643 | return FAM.getResult<DominatorTreeAnalysis>(F); | |||
2644 | }; | |||
2645 | auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { | |||
2646 | return FAM.getResult<TargetLibraryAnalysis>(F); | |||
2647 | }; | |||
2648 | auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { | |||
2649 | return FAM.getResult<TargetIRAnalysis>(F); | |||
2650 | }; | |||
2651 | ||||
2652 | auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { | |||
2653 | return FAM.getResult<BlockFrequencyAnalysis>(F); | |||
2654 | }; | |||
2655 | ||||
2656 | if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree)) | |||
2657 | return PreservedAnalyses::all(); | |||
2658 | return PreservedAnalyses::none(); | |||
2659 | } | |||
2660 | ||||
2661 | namespace { | |||
2662 | ||||
2663 | struct GlobalOptLegacyPass : public ModulePass { | |||
2664 | static char ID; // Pass identification, replacement for typeid | |||
2665 | ||||
2666 | GlobalOptLegacyPass() : ModulePass(ID) { | |||
2667 | initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry()); | |||
2668 | } | |||
2669 | ||||
2670 | bool runOnModule(Module &M) override { | |||
2671 | if (skipModule(M)) | |||
2672 | return false; | |||
2673 | ||||
2674 | auto &DL = M.getDataLayout(); | |||
2675 | auto LookupDomTree = [this](Function &F) -> DominatorTree & { | |||
2676 | return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); | |||
2677 | }; | |||
2678 | auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { | |||
2679 | return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); | |||
2680 | }; | |||
2681 | auto GetTTI = [this](Function &F) -> TargetTransformInfo & { | |||
2682 | return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); | |||
2683 | }; | |||
2684 | ||||
2685 | auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & { | |||
2686 | return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); | |||
2687 | }; | |||
2688 | ||||
2689 | return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, | |||
2690 | LookupDomTree); | |||
2691 | } | |||
2692 | ||||
2693 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
2694 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | |||
2695 | AU.addRequired<TargetTransformInfoWrapperPass>(); | |||
2696 | AU.addRequired<DominatorTreeWrapperPass>(); | |||
2697 | AU.addRequired<BlockFrequencyInfoWrapperPass>(); | |||
2698 | } | |||
2699 | }; | |||
2700 | ||||
2701 | } // end anonymous namespace | |||
2702 | ||||
2703 | char GlobalOptLegacyPass::ID = 0; | |||
2704 | ||||
2705 | INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",static void *initializeGlobalOptLegacyPassPassOnce(PassRegistry &Registry) { | |||
2706 | "Global Variable Optimizer", false, false)static void *initializeGlobalOptLegacyPassPassOnce(PassRegistry &Registry) { | |||
2707 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); | |||
2708 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry); | |||
2709 | INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)initializeBlockFrequencyInfoWrapperPassPass(Registry); | |||
2710 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | |||
2711 | INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",PassInfo *PI = new PassInfo( "Global Variable Optimizer", "globalopt" , &GlobalOptLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <GlobalOptLegacyPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeGlobalOptLegacyPassPassFlag ; void llvm::initializeGlobalOptLegacyPassPass(PassRegistry & Registry) { llvm::call_once(InitializeGlobalOptLegacyPassPassFlag , initializeGlobalOptLegacyPassPassOnce, std::ref(Registry)); } | |||
2712 | "Global Variable Optimizer", false, false)PassInfo *PI = new PassInfo( "Global Variable Optimizer", "globalopt" , &GlobalOptLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <GlobalOptLegacyPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeGlobalOptLegacyPassPassFlag ; void llvm::initializeGlobalOptLegacyPassPass(PassRegistry & Registry) { llvm::call_once(InitializeGlobalOptLegacyPassPassFlag , initializeGlobalOptLegacyPassPassOnce, std::ref(Registry)); } | |||
2713 | ||||
2714 | ModulePass *llvm::createGlobalOptimizerPass() { | |||
2715 | return new GlobalOptLegacyPass(); | |||
2716 | } |
1 | //===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the SmallVector class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_ADT_SMALLVECTOR_H |
14 | #define LLVM_ADT_SMALLVECTOR_H |
15 | |
16 | #include "llvm/ADT/iterator_range.h" |
17 | #include "llvm/Support/Compiler.h" |
18 | #include "llvm/Support/ErrorHandling.h" |
19 | #include "llvm/Support/MemAlloc.h" |
20 | #include "llvm/Support/type_traits.h" |
21 | #include <algorithm> |
22 | #include <cassert> |
23 | #include <cstddef> |
24 | #include <cstdlib> |
25 | #include <cstring> |
26 | #include <functional> |
27 | #include <initializer_list> |
28 | #include <iterator> |
29 | #include <limits> |
30 | #include <memory> |
31 | #include <new> |
32 | #include <type_traits> |
33 | #include <utility> |
34 | |
35 | namespace llvm { |
36 | |
37 | /// This is all the stuff common to all SmallVectors. |
38 | /// |
39 | /// The template parameter specifies the type which should be used to hold the |
40 | /// Size and Capacity of the SmallVector, so it can be adjusted. |
41 | /// Using 32 bit size is desirable to shrink the size of the SmallVector. |
42 | /// Using 64 bit size is desirable for cases like SmallVector<char>, where a |
43 | /// 32 bit size would limit the vector to ~4GB. SmallVectors are used for |
44 | /// buffering bitcode output - which can exceed 4GB. |
45 | template <class Size_T> class SmallVectorBase { |
46 | protected: |
47 | void *BeginX; |
48 | Size_T Size = 0, Capacity; |
49 | |
50 | /// The maximum value of the Size_T used. |
51 | static constexpr size_t SizeTypeMax() { |
52 | return std::numeric_limits<Size_T>::max(); |
53 | } |
54 | |
55 | SmallVectorBase() = delete; |
56 | SmallVectorBase(void *FirstEl, size_t TotalCapacity) |
57 | : BeginX(FirstEl), Capacity(TotalCapacity) {} |
58 | |
59 | /// This is a helper for \a grow() that's out of line to reduce code |
60 | /// duplication. This function will report a fatal error if it can't grow at |
61 | /// least to \p MinSize. |
62 | void *mallocForGrow(size_t MinSize, size_t TSize, size_t &NewCapacity); |
63 | |
64 | /// This is an implementation of the grow() method which only works |
65 | /// on POD-like data types and is out of line to reduce code duplication. |
66 | /// This function will report a fatal error if it cannot increase capacity. |
67 | void grow_pod(void *FirstEl, size_t MinSize, size_t TSize); |
68 | |
69 | public: |
70 | size_t size() const { return Size; } |
71 | size_t capacity() const { return Capacity; } |
72 | |
73 | LLVM_NODISCARD[[clang::warn_unused_result]] bool empty() const { return !Size; } |
74 | |
75 | /// Set the array size to \p N, which the current array must have enough |
76 | /// capacity for. |
77 | /// |
78 | /// This does not construct or destroy any elements in the vector. |
79 | /// |
80 | /// Clients can use this in conjunction with capacity() to write past the end |
81 | /// of the buffer when they know that more elements are available, and only |
82 | /// update the size later. This avoids the cost of value initializing elements |
83 | /// which will only be overwritten. |
84 | void set_size(size_t N) { |
85 | assert(N <= capacity())((void)0); |
86 | Size = N; |
87 | } |
88 | }; |
89 | |
90 | template <class T> |
91 | using SmallVectorSizeType = |
92 | typename std::conditional<sizeof(T) < 4 && sizeof(void *) >= 8, uint64_t, |
93 | uint32_t>::type; |
94 | |
95 | /// Figure out the offset of the first element. |
96 | template <class T, typename = void> struct SmallVectorAlignmentAndSize { |
97 | alignas(SmallVectorBase<SmallVectorSizeType<T>>) char Base[sizeof( |
98 | SmallVectorBase<SmallVectorSizeType<T>>)]; |
99 | alignas(T) char FirstEl[sizeof(T)]; |
100 | }; |
101 | |
102 | /// This is the part of SmallVectorTemplateBase which does not depend on whether |
103 | /// the type T is a POD. The extra dummy template argument is used by ArrayRef |
104 | /// to avoid unnecessarily requiring T to be complete. |
105 | template <typename T, typename = void> |
106 | class SmallVectorTemplateCommon |
107 | : public SmallVectorBase<SmallVectorSizeType<T>> { |
108 | using Base = SmallVectorBase<SmallVectorSizeType<T>>; |
109 | |
110 | /// Find the address of the first element. For this pointer math to be valid |
111 | /// with small-size of 0 for T with lots of alignment, it's important that |
112 | /// SmallVectorStorage is properly-aligned even for small-size of 0. |
113 | void *getFirstEl() const { |
114 | return const_cast<void *>(reinterpret_cast<const void *>( |
115 | reinterpret_cast<const char *>(this) + |
116 | offsetof(SmallVectorAlignmentAndSize<T>, FirstEl)__builtin_offsetof(SmallVectorAlignmentAndSize<T>, FirstEl ))); |
117 | } |
118 | // Space after 'FirstEl' is clobbered, do not add any instance vars after it. |
119 | |
120 | protected: |
121 | SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {} |
122 | |
123 | void grow_pod(size_t MinSize, size_t TSize) { |
124 | Base::grow_pod(getFirstEl(), MinSize, TSize); |
125 | } |
126 | |
127 | /// Return true if this is a smallvector which has not had dynamic |
128 | /// memory allocated for it. |
129 | bool isSmall() const { return this->BeginX == getFirstEl(); } |
130 | |
131 | /// Put this vector in a state of being small. |
132 | void resetToSmall() { |
133 | this->BeginX = getFirstEl(); |
134 | this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect. |
135 | } |
136 | |
137 | /// Return true if V is an internal reference to the given range. |
138 | bool isReferenceToRange(const void *V, const void *First, const void *Last) const { |
139 | // Use std::less to avoid UB. |
140 | std::less<> LessThan; |
141 | return !LessThan(V, First) && LessThan(V, Last); |
142 | } |
143 | |
144 | /// Return true if V is an internal reference to this vector. |
145 | bool isReferenceToStorage(const void *V) const { |
146 | return isReferenceToRange(V, this->begin(), this->end()); |
147 | } |
148 | |
149 | /// Return true if First and Last form a valid (possibly empty) range in this |
150 | /// vector's storage. |
151 | bool isRangeInStorage(const void *First, const void *Last) const { |
152 | // Use std::less to avoid UB. |
153 | std::less<> LessThan; |
154 | return !LessThan(First, this->begin()) && !LessThan(Last, First) && |
155 | !LessThan(this->end(), Last); |
156 | } |
157 | |
158 | /// Return true unless Elt will be invalidated by resizing the vector to |
159 | /// NewSize. |
160 | bool isSafeToReferenceAfterResize(const void *Elt, size_t NewSize) { |
161 | // Past the end. |
162 | if (LLVM_LIKELY(!isReferenceToStorage(Elt))__builtin_expect((bool)(!isReferenceToStorage(Elt)), true)) |
163 | return true; |
164 | |
165 | // Return false if Elt will be destroyed by shrinking. |
166 | if (NewSize <= this->size()) |
167 | return Elt < this->begin() + NewSize; |
168 | |
169 | // Return false if we need to grow. |
170 | return NewSize <= this->capacity(); |
171 | } |
172 | |
173 | /// Check whether Elt will be invalidated by resizing the vector to NewSize. |
174 | void assertSafeToReferenceAfterResize(const void *Elt, size_t NewSize) { |
175 | assert(isSafeToReferenceAfterResize(Elt, NewSize) &&((void)0) |
176 | "Attempting to reference an element of the vector in an operation "((void)0) |
177 | "that invalidates it")((void)0); |
178 | } |
179 | |
180 | /// Check whether Elt will be invalidated by increasing the size of the |
181 | /// vector by N. |
182 | void assertSafeToAdd(const void *Elt, size_t N = 1) { |
183 | this->assertSafeToReferenceAfterResize(Elt, this->size() + N); |
184 | } |
185 | |
186 | /// Check whether any part of the range will be invalidated by clearing. |
187 | void assertSafeToReferenceAfterClear(const T *From, const T *To) { |
188 | if (From == To) |
189 | return; |
190 | this->assertSafeToReferenceAfterResize(From, 0); |
191 | this->assertSafeToReferenceAfterResize(To - 1, 0); |
192 | } |
193 | template < |
194 | class ItTy, |
195 | std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value, |
196 | bool> = false> |
197 | void assertSafeToReferenceAfterClear(ItTy, ItTy) {} |
198 | |
199 | /// Check whether any part of the range will be invalidated by growing. |
200 | void assertSafeToAddRange(const T *From, const T *To) { |
201 | if (From == To) |
202 | return; |
203 | this->assertSafeToAdd(From, To - From); |
204 | this->assertSafeToAdd(To - 1, To - From); |
205 | } |
206 | template < |
207 | class ItTy, |
208 | std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value, |
209 | bool> = false> |
210 | void assertSafeToAddRange(ItTy, ItTy) {} |
211 | |
212 | /// Reserve enough space to add one element, and return the updated element |
213 | /// pointer in case it was a reference to the storage. |
214 | template <class U> |
215 | static const T *reserveForParamAndGetAddressImpl(U *This, const T &Elt, |
216 | size_t N) { |
217 | size_t NewSize = This->size() + N; |
218 | if (LLVM_LIKELY(NewSize <= This->capacity())__builtin_expect((bool)(NewSize <= This->capacity()), true )) |
219 | return &Elt; |
220 | |
221 | bool ReferencesStorage = false; |
222 | int64_t Index = -1; |
223 | if (!U::TakesParamByValue) { |
224 | if (LLVM_UNLIKELY(This->isReferenceToStorage(&Elt))__builtin_expect((bool)(This->isReferenceToStorage(&Elt )), false)) { |
225 | ReferencesStorage = true; |
226 | Index = &Elt - This->begin(); |
227 | } |
228 | } |
229 | This->grow(NewSize); |
230 | return ReferencesStorage ? This->begin() + Index : &Elt; |
231 | } |
232 | |
233 | public: |
234 | using size_type = size_t; |
235 | using difference_type = ptrdiff_t; |
236 | using value_type = T; |
237 | using iterator = T *; |
238 | using const_iterator = const T *; |
239 | |
240 | using const_reverse_iterator = std::reverse_iterator<const_iterator>; |
241 | using reverse_iterator = std::reverse_iterator<iterator>; |
242 | |
243 | using reference = T &; |
244 | using const_reference = const T &; |
245 | using pointer = T *; |
246 | using const_pointer = const T *; |
247 | |
248 | using Base::capacity; |
249 | using Base::empty; |
250 | using Base::size; |
251 | |
252 | // forward iterator creation methods. |
253 | iterator begin() { return (iterator)this->BeginX; } |
254 | const_iterator begin() const { return (const_iterator)this->BeginX; } |
255 | iterator end() { return begin() + size(); } |
256 | const_iterator end() const { return begin() + size(); } |
257 | |
258 | // reverse iterator creation methods. |
259 | reverse_iterator rbegin() { return reverse_iterator(end()); } |
260 | const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } |
261 | reverse_iterator rend() { return reverse_iterator(begin()); } |
262 | const_reverse_iterator rend() const { return const_reverse_iterator(begin());} |
263 | |
264 | size_type size_in_bytes() const { return size() * sizeof(T); } |
265 | size_type max_size() const { |
266 | return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T)); |
267 | } |
268 | |
269 | size_t capacity_in_bytes() const { return capacity() * sizeof(T); } |
270 | |
271 | /// Return a pointer to the vector's buffer, even if empty(). |
272 | pointer data() { return pointer(begin()); } |
273 | /// Return a pointer to the vector's buffer, even if empty(). |
274 | const_pointer data() const { return const_pointer(begin()); } |
275 | |
276 | reference operator[](size_type idx) { |
277 | assert(idx < size())((void)0); |
278 | return begin()[idx]; |
279 | } |
280 | const_reference operator[](size_type idx) const { |
281 | assert(idx < size())((void)0); |
282 | return begin()[idx]; |
283 | } |
284 | |
285 | reference front() { |
286 | assert(!empty())((void)0); |
287 | return begin()[0]; |
288 | } |
289 | const_reference front() const { |
290 | assert(!empty())((void)0); |
291 | return begin()[0]; |
292 | } |
293 | |
294 | reference back() { |
295 | assert(!empty())((void)0); |
296 | return end()[-1]; |
297 | } |
298 | const_reference back() const { |
299 | assert(!empty())((void)0); |
300 | return end()[-1]; |
301 | } |
302 | }; |
303 | |
304 | /// SmallVectorTemplateBase<TriviallyCopyable = false> - This is where we put |
305 | /// method implementations that are designed to work with non-trivial T's. |
306 | /// |
307 | /// We approximate is_trivially_copyable with trivial move/copy construction and |
308 | /// trivial destruction. While the standard doesn't specify that you're allowed |
309 | /// copy these types with memcpy, there is no way for the type to observe this. |
310 | /// This catches the important case of std::pair<POD, POD>, which is not |
311 | /// trivially assignable. |
312 | template <typename T, bool = (is_trivially_copy_constructible<T>::value) && |
313 | (is_trivially_move_constructible<T>::value) && |
314 | std::is_trivially_destructible<T>::value> |
315 | class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> { |
316 | friend class SmallVectorTemplateCommon<T>; |
317 | |
318 | protected: |
319 | static constexpr bool TakesParamByValue = false; |
320 | using ValueParamT = const T &; |
321 | |
322 | SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {} |
323 | |
324 | static void destroy_range(T *S, T *E) { |
325 | while (S != E) { |
326 | --E; |
327 | E->~T(); |
328 | } |
329 | } |
330 | |
331 | /// Move the range [I, E) into the uninitialized memory starting with "Dest", |
332 | /// constructing elements as needed. |
333 | template<typename It1, typename It2> |
334 | static void uninitialized_move(It1 I, It1 E, It2 Dest) { |
335 | std::uninitialized_copy(std::make_move_iterator(I), |
336 | std::make_move_iterator(E), Dest); |
337 | } |
338 | |
339 | /// Copy the range [I, E) onto the uninitialized memory starting with "Dest", |
340 | /// constructing elements as needed. |
341 | template<typename It1, typename It2> |
342 | static void uninitialized_copy(It1 I, It1 E, It2 Dest) { |
343 | std::uninitialized_copy(I, E, Dest); |
344 | } |
345 | |
346 | /// Grow the allocated memory (without initializing new elements), doubling |
347 | /// the size of the allocated memory. Guarantees space for at least one more |
348 | /// element, or MinSize more elements if specified. |
349 | void grow(size_t MinSize = 0); |
350 | |
351 | /// Create a new allocation big enough for \p MinSize and pass back its size |
352 | /// in \p NewCapacity. This is the first section of \a grow(). |
353 | T *mallocForGrow(size_t MinSize, size_t &NewCapacity) { |
354 | return static_cast<T *>( |
355 | SmallVectorBase<SmallVectorSizeType<T>>::mallocForGrow( |
356 | MinSize, sizeof(T), NewCapacity)); |
357 | } |
358 | |
359 | /// Move existing elements over to the new allocation \p NewElts, the middle |
360 | /// section of \a grow(). |
361 | void moveElementsForGrow(T *NewElts); |
362 | |
363 | /// Transfer ownership of the allocation, finishing up \a grow(). |
364 | void takeAllocationForGrow(T *NewElts, size_t NewCapacity); |
365 | |
366 | /// Reserve enough space to add one element, and return the updated element |
367 | /// pointer in case it was a reference to the storage. |
368 | const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) { |
369 | return this->reserveForParamAndGetAddressImpl(this, Elt, N); |
370 | } |
371 | |
372 | /// Reserve enough space to add one element, and return the updated element |
373 | /// pointer in case it was a reference to the storage. |
374 | T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) { |
375 | return const_cast<T *>( |
376 | this->reserveForParamAndGetAddressImpl(this, Elt, N)); |
377 | } |
378 | |
379 | static T &&forward_value_param(T &&V) { return std::move(V); } |
380 | static const T &forward_value_param(const T &V) { return V; } |
381 | |
382 | void growAndAssign(size_t NumElts, const T &Elt) { |
383 | // Grow manually in case Elt is an internal reference. |
384 | size_t NewCapacity; |
385 | T *NewElts = mallocForGrow(NumElts, NewCapacity); |
386 | std::uninitialized_fill_n(NewElts, NumElts, Elt); |
387 | this->destroy_range(this->begin(), this->end()); |
388 | takeAllocationForGrow(NewElts, NewCapacity); |
389 | this->set_size(NumElts); |
390 | } |
391 | |
392 | template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) { |
393 | // Grow manually in case one of Args is an internal reference. |
394 | size_t NewCapacity; |
395 | T *NewElts = mallocForGrow(0, NewCapacity); |
396 | ::new ((void *)(NewElts + this->size())) T(std::forward<ArgTypes>(Args)...); |
397 | moveElementsForGrow(NewElts); |
398 | takeAllocationForGrow(NewElts, NewCapacity); |
399 | this->set_size(this->size() + 1); |
400 | return this->back(); |
401 | } |
402 | |
403 | public: |
404 | void push_back(const T &Elt) { |
405 | const T *EltPtr = reserveForParamAndGetAddress(Elt); |
406 | ::new ((void *)this->end()) T(*EltPtr); |
407 | this->set_size(this->size() + 1); |
408 | } |
409 | |
410 | void push_back(T &&Elt) { |
411 | T *EltPtr = reserveForParamAndGetAddress(Elt); |
412 | ::new ((void *)this->end()) T(::std::move(*EltPtr)); |
413 | this->set_size(this->size() + 1); |
414 | } |
415 | |
416 | void pop_back() { |
417 | this->set_size(this->size() - 1); |
418 | this->end()->~T(); |
419 | } |
420 | }; |
421 | |
422 | // Define this out-of-line to dissuade the C++ compiler from inlining it. |
423 | template <typename T, bool TriviallyCopyable> |
424 | void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) { |
425 | size_t NewCapacity; |
426 | T *NewElts = mallocForGrow(MinSize, NewCapacity); |
427 | moveElementsForGrow(NewElts); |
428 | takeAllocationForGrow(NewElts, NewCapacity); |
429 | } |
430 | |
431 | // Define this out-of-line to dissuade the C++ compiler from inlining it. |
432 | template <typename T, bool TriviallyCopyable> |
433 | void SmallVectorTemplateBase<T, TriviallyCopyable>::moveElementsForGrow( |
434 | T *NewElts) { |
435 | // Move the elements over. |
436 | this->uninitialized_move(this->begin(), this->end(), NewElts); |
437 | |
438 | // Destroy the original elements. |
439 | destroy_range(this->begin(), this->end()); |
440 | } |
441 | |
442 | // Define this out-of-line to dissuade the C++ compiler from inlining it. |
443 | template <typename T, bool TriviallyCopyable> |
444 | void SmallVectorTemplateBase<T, TriviallyCopyable>::takeAllocationForGrow( |
445 | T *NewElts, size_t NewCapacity) { |
446 | // If this wasn't grown from the inline copy, deallocate the old space. |
447 | if (!this->isSmall()) |
448 | free(this->begin()); |
449 | |
450 | this->BeginX = NewElts; |
451 | this->Capacity = NewCapacity; |
452 | } |
453 | |
454 | /// SmallVectorTemplateBase<TriviallyCopyable = true> - This is where we put |
455 | /// method implementations that are designed to work with trivially copyable |
456 | /// T's. This allows using memcpy in place of copy/move construction and |
457 | /// skipping destruction. |
458 | template <typename T> |
459 | class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> { |
460 | friend class SmallVectorTemplateCommon<T>; |
461 | |
462 | protected: |
463 | /// True if it's cheap enough to take parameters by value. Doing so avoids |
464 | /// overhead related to mitigations for reference invalidation. |
465 | static constexpr bool TakesParamByValue = sizeof(T) <= 2 * sizeof(void *); |
466 | |
467 | /// Either const T& or T, depending on whether it's cheap enough to take |
468 | /// parameters by value. |
469 | using ValueParamT = |
470 | typename std::conditional<TakesParamByValue, T, const T &>::type; |
471 | |
472 | SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {} |
473 | |
474 | // No need to do a destroy loop for POD's. |
475 | static void destroy_range(T *, T *) {} |
476 | |
477 | /// Move the range [I, E) onto the uninitialized memory |
478 | /// starting with "Dest", constructing elements into it as needed. |
479 | template<typename It1, typename It2> |
480 | static void uninitialized_move(It1 I, It1 E, It2 Dest) { |
481 | // Just do a copy. |
482 | uninitialized_copy(I, E, Dest); |
483 | } |
484 | |
485 | /// Copy the range [I, E) onto the uninitialized memory |
486 | /// starting with "Dest", constructing elements into it as needed. |
487 | template<typename It1, typename It2> |
488 | static void uninitialized_copy(It1 I, It1 E, It2 Dest) { |
489 | // Arbitrary iterator types; just use the basic implementation. |
490 | std::uninitialized_copy(I, E, Dest); |
491 | } |
492 | |
493 | /// Copy the range [I, E) onto the uninitialized memory |
494 | /// starting with "Dest", constructing elements into it as needed. |
495 | template <typename T1, typename T2> |
496 | static void uninitialized_copy( |
497 | T1 *I, T1 *E, T2 *Dest, |
498 | std::enable_if_t<std::is_same<typename std::remove_const<T1>::type, |
499 | T2>::value> * = nullptr) { |
500 | // Use memcpy for PODs iterated by pointers (which includes SmallVector |
501 | // iterators): std::uninitialized_copy optimizes to memmove, but we can |
502 | // use memcpy here. Note that I and E are iterators and thus might be |
503 | // invalid for memcpy if they are equal. |
504 | if (I != E) |
505 | memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T)); |
506 | } |
507 | |
508 | /// Double the size of the allocated memory, guaranteeing space for at |
509 | /// least one more element or MinSize if specified. |
510 | void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); } |
511 | |
512 | /// Reserve enough space to add one element, and return the updated element |
513 | /// pointer in case it was a reference to the storage. |
514 | const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) { |
515 | return this->reserveForParamAndGetAddressImpl(this, Elt, N); |
516 | } |
517 | |
518 | /// Reserve enough space to add one element, and return the updated element |
519 | /// pointer in case it was a reference to the storage. |
520 | T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) { |
521 | return const_cast<T *>( |
522 | this->reserveForParamAndGetAddressImpl(this, Elt, N)); |
523 | } |
524 | |
525 | /// Copy \p V or return a reference, depending on \a ValueParamT. |
526 | static ValueParamT forward_value_param(ValueParamT V) { return V; } |
527 | |
528 | void growAndAssign(size_t NumElts, T Elt) { |
529 | // Elt has been copied in case it's an internal reference, side-stepping |
530 | // reference invalidation problems without losing the realloc optimization. |
531 | this->set_size(0); |
532 | this->grow(NumElts); |
533 | std::uninitialized_fill_n(this->begin(), NumElts, Elt); |
534 | this->set_size(NumElts); |
535 | } |
536 | |
537 | template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) { |
538 | // Use push_back with a copy in case Args has an internal reference, |
539 | // side-stepping reference invalidation problems without losing the realloc |
540 | // optimization. |
541 | push_back(T(std::forward<ArgTypes>(Args)...)); |
542 | return this->back(); |
543 | } |
544 | |
545 | public: |
546 | void push_back(ValueParamT Elt) { |
547 | const T *EltPtr = reserveForParamAndGetAddress(Elt); |
548 | memcpy(reinterpret_cast<void *>(this->end()), EltPtr, sizeof(T)); |
549 | this->set_size(this->size() + 1); |
550 | } |
551 | |
552 | void pop_back() { this->set_size(this->size() - 1); } |
553 | }; |
554 | |
555 | /// This class consists of common code factored out of the SmallVector class to |
556 | /// reduce code duplication based on the SmallVector 'N' template parameter. |
557 | template <typename T> |
558 | class SmallVectorImpl : public SmallVectorTemplateBase<T> { |
559 | using SuperClass = SmallVectorTemplateBase<T>; |
560 | |
561 | public: |
562 | using iterator = typename SuperClass::iterator; |
563 | using const_iterator = typename SuperClass::const_iterator; |
564 | using reference = typename SuperClass::reference; |
565 | using size_type = typename SuperClass::size_type; |
566 | |
567 | protected: |
568 | using SmallVectorTemplateBase<T>::TakesParamByValue; |
569 | using ValueParamT = typename SuperClass::ValueParamT; |
570 | |
571 | // Default ctor - Initialize to empty. |
572 | explicit SmallVectorImpl(unsigned N) |
573 | : SmallVectorTemplateBase<T>(N) {} |
574 | |
575 | public: |
576 | SmallVectorImpl(const SmallVectorImpl &) = delete; |
577 | |
578 | ~SmallVectorImpl() { |
579 | // Subclass has already destructed this vector's elements. |
580 | // If this wasn't grown from the inline copy, deallocate the old space. |
581 | if (!this->isSmall()) |
582 | free(this->begin()); |
583 | } |
584 | |
585 | void clear() { |
586 | this->destroy_range(this->begin(), this->end()); |
587 | this->Size = 0; |
588 | } |
589 | |
590 | private: |
591 | template <bool ForOverwrite> void resizeImpl(size_type N) { |
592 | if (N < this->size()) { |
593 | this->pop_back_n(this->size() - N); |
594 | } else if (N > this->size()) { |
595 | this->reserve(N); |
596 | for (auto I = this->end(), E = this->begin() + N; I != E; ++I) |
597 | if (ForOverwrite) |
598 | new (&*I) T; |
599 | else |
600 | new (&*I) T(); |
601 | this->set_size(N); |
602 | } |
603 | } |
604 | |
605 | public: |
606 | void resize(size_type N) { resizeImpl<false>(N); } |
607 | |
608 | /// Like resize, but \ref T is POD, the new values won't be initialized. |
609 | void resize_for_overwrite(size_type N) { resizeImpl<true>(N); } |
610 | |
611 | void resize(size_type N, ValueParamT NV) { |
612 | if (N == this->size()) |
613 | return; |
614 | |
615 | if (N < this->size()) { |
616 | this->pop_back_n(this->size() - N); |
617 | return; |
618 | } |
619 | |
620 | // N > this->size(). Defer to append. |
621 | this->append(N - this->size(), NV); |
622 | } |
623 | |
624 | void reserve(size_type N) { |
625 | if (this->capacity() < N) |
626 | this->grow(N); |
627 | } |
628 | |
629 | void pop_back_n(size_type NumItems) { |
630 | assert(this->size() >= NumItems)((void)0); |
631 | this->destroy_range(this->end() - NumItems, this->end()); |
632 | this->set_size(this->size() - NumItems); |
633 | } |
634 | |
635 | LLVM_NODISCARD[[clang::warn_unused_result]] T pop_back_val() { |
636 | T Result = ::std::move(this->back()); |
637 | this->pop_back(); |
638 | return Result; |
639 | } |
640 | |
641 | void swap(SmallVectorImpl &RHS); |
642 | |
643 | /// Add the specified range to the end of the SmallVector. |
644 | template <typename in_iter, |
645 | typename = std::enable_if_t<std::is_convertible< |
646 | typename std::iterator_traits<in_iter>::iterator_category, |
647 | std::input_iterator_tag>::value>> |
648 | void append(in_iter in_start, in_iter in_end) { |
649 | this->assertSafeToAddRange(in_start, in_end); |
650 | size_type NumInputs = std::distance(in_start, in_end); |
651 | this->reserve(this->size() + NumInputs); |
652 | this->uninitialized_copy(in_start, in_end, this->end()); |
653 | this->set_size(this->size() + NumInputs); |
654 | } |
655 | |
656 | /// Append \p NumInputs copies of \p Elt to the end. |
657 | void append(size_type NumInputs, ValueParamT Elt) { |
658 | const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumInputs); |
659 | std::uninitialized_fill_n(this->end(), NumInputs, *EltPtr); |
660 | this->set_size(this->size() + NumInputs); |
661 | } |
662 | |
663 | void append(std::initializer_list<T> IL) { |
664 | append(IL.begin(), IL.end()); |
665 | } |
666 | |
667 | void append(const SmallVectorImpl &RHS) { append(RHS.begin(), RHS.end()); } |
668 | |
669 | void assign(size_type NumElts, ValueParamT Elt) { |
670 | // Note that Elt could be an internal reference. |
671 | if (NumElts > this->capacity()) { |
672 | this->growAndAssign(NumElts, Elt); |
673 | return; |
674 | } |
675 | |
676 | // Assign over existing elements. |
677 | std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt); |
678 | if (NumElts > this->size()) |
679 | std::uninitialized_fill_n(this->end(), NumElts - this->size(), Elt); |
680 | else if (NumElts < this->size()) |
681 | this->destroy_range(this->begin() + NumElts, this->end()); |
682 | this->set_size(NumElts); |
683 | } |
684 | |
685 | // FIXME: Consider assigning over existing elements, rather than clearing & |
686 | // re-initializing them - for all assign(...) variants. |
687 | |
688 | template <typename in_iter, |
689 | typename = std::enable_if_t<std::is_convertible< |
690 | typename std::iterator_traits<in_iter>::iterator_category, |
691 | std::input_iterator_tag>::value>> |
692 | void assign(in_iter in_start, in_iter in_end) { |
693 | this->assertSafeToReferenceAfterClear(in_start, in_end); |
694 | clear(); |
695 | append(in_start, in_end); |
696 | } |
697 | |
698 | void assign(std::initializer_list<T> IL) { |
699 | clear(); |
700 | append(IL); |
701 | } |
702 | |
703 | void assign(const SmallVectorImpl &RHS) { assign(RHS.begin(), RHS.end()); } |
704 | |
705 | iterator erase(const_iterator CI) { |
706 | // Just cast away constness because this is a non-const member function. |
707 | iterator I = const_cast<iterator>(CI); |
708 | |
709 | assert(this->isReferenceToStorage(CI) && "Iterator to erase is out of bounds.")((void)0); |
710 | |
711 | iterator N = I; |
712 | // Shift all elts down one. |
713 | std::move(I+1, this->end(), I); |
714 | // Drop the last elt. |
715 | this->pop_back(); |
716 | return(N); |
717 | } |
718 | |
719 | iterator erase(const_iterator CS, const_iterator CE) { |
720 | // Just cast away constness because this is a non-const member function. |
721 | iterator S = const_cast<iterator>(CS); |
722 | iterator E = const_cast<iterator>(CE); |
723 | |
724 | assert(this->isRangeInStorage(S, E) && "Range to erase is out of bounds.")((void)0); |
725 | |
726 | iterator N = S; |
727 | // Shift all elts down. |
728 | iterator I = std::move(E, this->end(), S); |
729 | // Drop the last elts. |
730 | this->destroy_range(I, this->end()); |
731 | this->set_size(I - this->begin()); |
732 | return(N); |
733 | } |
734 | |
735 | private: |
736 | template <class ArgType> iterator insert_one_impl(iterator I, ArgType &&Elt) { |
737 | // Callers ensure that ArgType is derived from T. |
738 | static_assert( |
739 | std::is_same<std::remove_const_t<std::remove_reference_t<ArgType>>, |
740 | T>::value, |
741 | "ArgType must be derived from T!"); |
742 | |
743 | if (I == this->end()) { // Important special case for empty vector. |
744 | this->push_back(::std::forward<ArgType>(Elt)); |
745 | return this->end()-1; |
746 | } |
747 | |
748 | assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")((void)0); |
749 | |
750 | // Grow if necessary. |
751 | size_t Index = I - this->begin(); |
752 | std::remove_reference_t<ArgType> *EltPtr = |
753 | this->reserveForParamAndGetAddress(Elt); |
754 | I = this->begin() + Index; |
755 | |
756 | ::new ((void*) this->end()) T(::std::move(this->back())); |
757 | // Push everything else over. |
758 | std::move_backward(I, this->end()-1, this->end()); |
759 | this->set_size(this->size() + 1); |
760 | |
761 | // If we just moved the element we're inserting, be sure to update |
762 | // the reference (never happens if TakesParamByValue). |
763 | static_assert(!TakesParamByValue || std::is_same<ArgType, T>::value, |
764 | "ArgType must be 'T' when taking by value!"); |
765 | if (!TakesParamByValue && this->isReferenceToRange(EltPtr, I, this->end())) |
766 | ++EltPtr; |
767 | |
768 | *I = ::std::forward<ArgType>(*EltPtr); |
769 | return I; |
770 | } |
771 | |
772 | public: |
773 | iterator insert(iterator I, T &&Elt) { |
774 | return insert_one_impl(I, this->forward_value_param(std::move(Elt))); |
775 | } |
776 | |
777 | iterator insert(iterator I, const T &Elt) { |
778 | return insert_one_impl(I, this->forward_value_param(Elt)); |
779 | } |
780 | |
781 | iterator insert(iterator I, size_type NumToInsert, ValueParamT Elt) { |
782 | // Convert iterator to elt# to avoid invalidating iterator when we reserve() |
783 | size_t InsertElt = I - this->begin(); |
784 | |
785 | if (I == this->end()) { // Important special case for empty vector. |
786 | append(NumToInsert, Elt); |
787 | return this->begin()+InsertElt; |
788 | } |
789 | |
790 | assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")((void)0); |
791 | |
792 | // Ensure there is enough space, and get the (maybe updated) address of |
793 | // Elt. |
794 | const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumToInsert); |
795 | |
796 | // Uninvalidate the iterator. |
797 | I = this->begin()+InsertElt; |
798 | |
799 | // If there are more elements between the insertion point and the end of the |
800 | // range than there are being inserted, we can use a simple approach to |
801 | // insertion. Since we already reserved space, we know that this won't |
802 | // reallocate the vector. |
803 | if (size_t(this->end()-I) >= NumToInsert) { |
804 | T *OldEnd = this->end(); |
805 | append(std::move_iterator<iterator>(this->end() - NumToInsert), |
806 | std::move_iterator<iterator>(this->end())); |
807 | |
808 | // Copy the existing elements that get replaced. |
809 | std::move_backward(I, OldEnd-NumToInsert, OldEnd); |
810 | |
811 | // If we just moved the element we're inserting, be sure to update |
812 | // the reference (never happens if TakesParamByValue). |
813 | if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end()) |
814 | EltPtr += NumToInsert; |
815 | |
816 | std::fill_n(I, NumToInsert, *EltPtr); |
817 | return I; |
818 | } |
819 | |
820 | // Otherwise, we're inserting more elements than exist already, and we're |
821 | // not inserting at the end. |
822 | |
823 | // Move over the elements that we're about to overwrite. |
824 | T *OldEnd = this->end(); |
825 | this->set_size(this->size() + NumToInsert); |
826 | size_t NumOverwritten = OldEnd-I; |
827 | this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten); |
828 | |
829 | // If we just moved the element we're inserting, be sure to update |
830 | // the reference (never happens if TakesParamByValue). |
831 | if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end()) |
832 | EltPtr += NumToInsert; |
833 | |
834 | // Replace the overwritten part. |
835 | std::fill_n(I, NumOverwritten, *EltPtr); |
836 | |
837 | // Insert the non-overwritten middle part. |
838 | std::uninitialized_fill_n(OldEnd, NumToInsert - NumOverwritten, *EltPtr); |
839 | return I; |
840 | } |
841 | |
842 | template <typename ItTy, |
843 | typename = std::enable_if_t<std::is_convertible< |
844 | typename std::iterator_traits<ItTy>::iterator_category, |
845 | std::input_iterator_tag>::value>> |
846 | iterator insert(iterator I, ItTy From, ItTy To) { |
847 | // Convert iterator to elt# to avoid invalidating iterator when we reserve() |
848 | size_t InsertElt = I - this->begin(); |
849 | |
850 | if (I == this->end()) { // Important special case for empty vector. |
851 | append(From, To); |
852 | return this->begin()+InsertElt; |
853 | } |
854 | |
855 | assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")((void)0); |
856 | |
857 | // Check that the reserve that follows doesn't invalidate the iterators. |
858 | this->assertSafeToAddRange(From, To); |
859 | |
860 | size_t NumToInsert = std::distance(From, To); |
861 | |
862 | // Ensure there is enough space. |
863 | reserve(this->size() + NumToInsert); |
864 | |
865 | // Uninvalidate the iterator. |
866 | I = this->begin()+InsertElt; |
867 | |
868 | // If there are more elements between the insertion point and the end of the |
869 | // range than there are being inserted, we can use a simple approach to |
870 | // insertion. Since we already reserved space, we know that this won't |
871 | // reallocate the vector. |
872 | if (size_t(this->end()-I) >= NumToInsert) { |
873 | T *OldEnd = this->end(); |
874 | append(std::move_iterator<iterator>(this->end() - NumToInsert), |
875 | std::move_iterator<iterator>(this->end())); |
876 | |
877 | // Copy the existing elements that get replaced. |
878 | std::move_backward(I, OldEnd-NumToInsert, OldEnd); |
879 | |
880 | std::copy(From, To, I); |
881 | return I; |
882 | } |
883 | |
884 | // Otherwise, we're inserting more elements than exist already, and we're |
885 | // not inserting at the end. |
886 | |
887 | // Move over the elements that we're about to overwrite. |
888 | T *OldEnd = this->end(); |
889 | this->set_size(this->size() + NumToInsert); |
890 | size_t NumOverwritten = OldEnd-I; |
891 | this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten); |
892 | |
893 | // Replace the overwritten part. |
894 | for (T *J = I; NumOverwritten > 0; --NumOverwritten) { |
895 | *J = *From; |
896 | ++J; ++From; |
897 | } |
898 | |
899 | // Insert the non-overwritten middle part. |
900 | this->uninitialized_copy(From, To, OldEnd); |
901 | return I; |
902 | } |
903 | |
904 | void insert(iterator I, std::initializer_list<T> IL) { |
905 | insert(I, IL.begin(), IL.end()); |
906 | } |
907 | |
908 | template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) { |
909 | if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity ()), false)) |
910 | return this->growAndEmplaceBack(std::forward<ArgTypes>(Args)...); |
911 | |
912 | ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...); |
913 | this->set_size(this->size() + 1); |
914 | return this->back(); |
915 | } |
916 | |
917 | SmallVectorImpl &operator=(const SmallVectorImpl &RHS); |
918 | |
919 | SmallVectorImpl &operator=(SmallVectorImpl &&RHS); |
920 | |
921 | bool operator==(const SmallVectorImpl &RHS) const { |
922 | if (this->size() != RHS.size()) return false; |
923 | return std::equal(this->begin(), this->end(), RHS.begin()); |
924 | } |
925 | bool operator!=(const SmallVectorImpl &RHS) const { |
926 | return !(*this == RHS); |
927 | } |
928 | |
929 | bool operator<(const SmallVectorImpl &RHS) const { |
930 | return std::lexicographical_compare(this->begin(), this->end(), |
931 | RHS.begin(), RHS.end()); |
932 | } |
933 | }; |
934 | |
935 | template <typename T> |
936 | void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) { |
937 | if (this == &RHS) return; |
938 | |
939 | // We can only avoid copying elements if neither vector is small. |
940 | if (!this->isSmall() && !RHS.isSmall()) { |
941 | std::swap(this->BeginX, RHS.BeginX); |
942 | std::swap(this->Size, RHS.Size); |
943 | std::swap(this->Capacity, RHS.Capacity); |
944 | return; |
945 | } |
946 | this->reserve(RHS.size()); |
947 | RHS.reserve(this->size()); |
948 | |
949 | // Swap the shared elements. |
950 | size_t NumShared = this->size(); |
951 | if (NumShared > RHS.size()) NumShared = RHS.size(); |
952 | for (size_type i = 0; i != NumShared; ++i) |
953 | std::swap((*this)[i], RHS[i]); |
954 | |
955 | // Copy over the extra elts. |
956 | if (this->size() > RHS.size()) { |
957 | size_t EltDiff = this->size() - RHS.size(); |
958 | this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end()); |
959 | RHS.set_size(RHS.size() + EltDiff); |
960 | this->destroy_range(this->begin()+NumShared, this->end()); |
961 | this->set_size(NumShared); |
962 | } else if (RHS.size() > this->size()) { |
963 | size_t EltDiff = RHS.size() - this->size(); |
964 | this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end()); |
965 | this->set_size(this->size() + EltDiff); |
966 | this->destroy_range(RHS.begin()+NumShared, RHS.end()); |
967 | RHS.set_size(NumShared); |
968 | } |
969 | } |
970 | |
971 | template <typename T> |
972 | SmallVectorImpl<T> &SmallVectorImpl<T>:: |
973 | operator=(const SmallVectorImpl<T> &RHS) { |
974 | // Avoid self-assignment. |
975 | if (this == &RHS) return *this; |
976 | |
977 | // If we already have sufficient space, assign the common elements, then |
978 | // destroy any excess. |
979 | size_t RHSSize = RHS.size(); |
980 | size_t CurSize = this->size(); |
981 | if (CurSize >= RHSSize) { |
982 | // Assign common elements. |
983 | iterator NewEnd; |
984 | if (RHSSize) |
985 | NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin()); |
986 | else |
987 | NewEnd = this->begin(); |
988 | |
989 | // Destroy excess elements. |
990 | this->destroy_range(NewEnd, this->end()); |
991 | |
992 | // Trim. |
993 | this->set_size(RHSSize); |
994 | return *this; |
995 | } |
996 | |
997 | // If we have to grow to have enough elements, destroy the current elements. |
998 | // This allows us to avoid copying them during the grow. |
999 | // FIXME: don't do this if they're efficiently moveable. |
1000 | if (this->capacity() < RHSSize) { |
1001 | // Destroy current elements. |
1002 | this->clear(); |
1003 | CurSize = 0; |
1004 | this->grow(RHSSize); |
1005 | } else if (CurSize) { |
1006 | // Otherwise, use assignment for the already-constructed elements. |
1007 | std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin()); |
1008 | } |
1009 | |
1010 | // Copy construct the new elements in place. |
1011 | this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(), |
1012 | this->begin()+CurSize); |
1013 | |
1014 | // Set end. |
1015 | this->set_size(RHSSize); |
1016 | return *this; |
1017 | } |
1018 | |
1019 | template <typename T> |
1020 | SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) { |
1021 | // Avoid self-assignment. |
1022 | if (this == &RHS) return *this; |
1023 | |
1024 | // If the RHS isn't small, clear this vector and then steal its buffer. |
1025 | if (!RHS.isSmall()) { |
1026 | this->destroy_range(this->begin(), this->end()); |
1027 | if (!this->isSmall()) free(this->begin()); |
1028 | this->BeginX = RHS.BeginX; |
1029 | this->Size = RHS.Size; |
1030 | this->Capacity = RHS.Capacity; |
1031 | RHS.resetToSmall(); |
1032 | return *this; |
1033 | } |
1034 | |
1035 | // If we already have sufficient space, assign the common elements, then |
1036 | // destroy any excess. |
1037 | size_t RHSSize = RHS.size(); |
1038 | size_t CurSize = this->size(); |
1039 | if (CurSize >= RHSSize) { |
1040 | // Assign common elements. |
1041 | iterator NewEnd = this->begin(); |
1042 | if (RHSSize) |
1043 | NewEnd = std::move(RHS.begin(), RHS.end(), NewEnd); |
1044 | |
1045 | // Destroy excess elements and trim the bounds. |
1046 | this->destroy_range(NewEnd, this->end()); |
1047 | this->set_size(RHSSize); |
1048 | |
1049 | // Clear the RHS. |
1050 | RHS.clear(); |
1051 | |
1052 | return *this; |
1053 | } |
1054 | |
1055 | // If we have to grow to have enough elements, destroy the current elements. |
1056 | // This allows us to avoid copying them during the grow. |
1057 | // FIXME: this may not actually make any sense if we can efficiently move |
1058 | // elements. |
1059 | if (this->capacity() < RHSSize) { |
1060 | // Destroy current elements. |
1061 | this->clear(); |
1062 | CurSize = 0; |
1063 | this->grow(RHSSize); |
1064 | } else if (CurSize) { |
1065 | // Otherwise, use assignment for the already-constructed elements. |
1066 | std::move(RHS.begin(), RHS.begin()+CurSize, this->begin()); |
1067 | } |
1068 | |
1069 | // Move-construct the new elements in place. |
1070 | this->uninitialized_move(RHS.begin()+CurSize, RHS.end(), |
1071 | this->begin()+CurSize); |
1072 | |
1073 | // Set end. |
1074 | this->set_size(RHSSize); |
1075 | |
1076 | RHS.clear(); |
1077 | return *this; |
1078 | } |
1079 | |
1080 | /// Storage for the SmallVector elements. This is specialized for the N=0 case |
1081 | /// to avoid allocating unnecessary storage. |
1082 | template <typename T, unsigned N> |
1083 | struct SmallVectorStorage { |
1084 | alignas(T) char InlineElts[N * sizeof(T)]; |
1085 | }; |
1086 | |
1087 | /// We need the storage to be properly aligned even for small-size of 0 so that |
1088 | /// the pointer math in \a SmallVectorTemplateCommon::getFirstEl() is |
1089 | /// well-defined. |
1090 | template <typename T> struct alignas(T) SmallVectorStorage<T, 0> {}; |
1091 | |
1092 | /// Forward declaration of SmallVector so that |
1093 | /// calculateSmallVectorDefaultInlinedElements can reference |
1094 | /// `sizeof(SmallVector<T, 0>)`. |
1095 | template <typename T, unsigned N> class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector; |
1096 | |
1097 | /// Helper class for calculating the default number of inline elements for |
1098 | /// `SmallVector<T>`. |
1099 | /// |
1100 | /// This should be migrated to a constexpr function when our minimum |
1101 | /// compiler support is enough for multi-statement constexpr functions. |
1102 | template <typename T> struct CalculateSmallVectorDefaultInlinedElements { |
1103 | // Parameter controlling the default number of inlined elements |
1104 | // for `SmallVector<T>`. |
1105 | // |
1106 | // The default number of inlined elements ensures that |
1107 | // 1. There is at least one inlined element. |
1108 | // 2. `sizeof(SmallVector<T>) <= kPreferredSmallVectorSizeof` unless |
1109 | // it contradicts 1. |
1110 | static constexpr size_t kPreferredSmallVectorSizeof = 64; |
1111 | |
1112 | // static_assert that sizeof(T) is not "too big". |
1113 | // |
1114 | // Because our policy guarantees at least one inlined element, it is possible |
1115 | // for an arbitrarily large inlined element to allocate an arbitrarily large |
1116 | // amount of inline storage. We generally consider it an antipattern for a |
1117 | // SmallVector to allocate an excessive amount of inline storage, so we want |
1118 | // to call attention to these cases and make sure that users are making an |
1119 | // intentional decision if they request a lot of inline storage. |
1120 | // |
1121 | // We want this assertion to trigger in pathological cases, but otherwise |
1122 | // not be too easy to hit. To accomplish that, the cutoff is actually somewhat |
1123 | // larger than kPreferredSmallVectorSizeof (otherwise, |
1124 | // `SmallVector<SmallVector<T>>` would be one easy way to trip it, and that |
1125 | // pattern seems useful in practice). |
1126 | // |
1127 | // One wrinkle is that this assertion is in theory non-portable, since |
1128 | // sizeof(T) is in general platform-dependent. However, we don't expect this |
1129 | // to be much of an issue, because most LLVM development happens on 64-bit |
1130 | // hosts, and therefore sizeof(T) is expected to *decrease* when compiled for |
1131 | // 32-bit hosts, dodging the issue. The reverse situation, where development |
1132 | // happens on a 32-bit host and then fails due to sizeof(T) *increasing* on a |
1133 | // 64-bit host, is expected to be very rare. |
1134 | static_assert( |
1135 | sizeof(T) <= 256, |
1136 | "You are trying to use a default number of inlined elements for " |
1137 | "`SmallVector<T>` but `sizeof(T)` is really big! Please use an " |
1138 | "explicit number of inlined elements with `SmallVector<T, N>` to make " |
1139 | "sure you really want that much inline storage."); |
1140 | |
1141 | // Discount the size of the header itself when calculating the maximum inline |
1142 | // bytes. |
1143 | static constexpr size_t PreferredInlineBytes = |
1144 | kPreferredSmallVectorSizeof - sizeof(SmallVector<T, 0>); |
1145 | static constexpr size_t NumElementsThatFit = PreferredInlineBytes / sizeof(T); |
1146 | static constexpr size_t value = |
1147 | NumElementsThatFit == 0 ? 1 : NumElementsThatFit; |
1148 | }; |
1149 | |
1150 | /// This is a 'vector' (really, a variable-sized array), optimized |
1151 | /// for the case when the array is small. It contains some number of elements |
1152 | /// in-place, which allows it to avoid heap allocation when the actual number of |
1153 | /// elements is below that threshold. This allows normal "small" cases to be |
1154 | /// fast without losing generality for large inputs. |
1155 | /// |
1156 | /// \note |
1157 | /// In the absence of a well-motivated choice for the number of inlined |
1158 | /// elements \p N, it is recommended to use \c SmallVector<T> (that is, |
1159 | /// omitting the \p N). This will choose a default number of inlined elements |
1160 | /// reasonable for allocation on the stack (for example, trying to keep \c |
1161 | /// sizeof(SmallVector<T>) around 64 bytes). |
1162 | /// |
1163 | /// \warning This does not attempt to be exception safe. |
1164 | /// |
1165 | /// \see https://llvm.org/docs/ProgrammersManual.html#llvm-adt-smallvector-h |
1166 | template <typename T, |
1167 | unsigned N = CalculateSmallVectorDefaultInlinedElements<T>::value> |
1168 | class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector : public SmallVectorImpl<T>, |
1169 | SmallVectorStorage<T, N> { |
1170 | public: |
1171 | SmallVector() : SmallVectorImpl<T>(N) {} |
1172 | |
1173 | ~SmallVector() { |
1174 | // Destroy the constructed elements in the vector. |
1175 | this->destroy_range(this->begin(), this->end()); |
1176 | } |
1177 | |
1178 | explicit SmallVector(size_t Size, const T &Value = T()) |
1179 | : SmallVectorImpl<T>(N) { |
1180 | this->assign(Size, Value); |
1181 | } |
1182 | |
1183 | template <typename ItTy, |
1184 | typename = std::enable_if_t<std::is_convertible< |
1185 | typename std::iterator_traits<ItTy>::iterator_category, |
1186 | std::input_iterator_tag>::value>> |
1187 | SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(N) { |
1188 | this->append(S, E); |
1189 | } |
1190 | |
1191 | template <typename RangeTy> |
1192 | explicit SmallVector(const iterator_range<RangeTy> &R) |
1193 | : SmallVectorImpl<T>(N) { |
1194 | this->append(R.begin(), R.end()); |
1195 | } |
1196 | |
1197 | SmallVector(std::initializer_list<T> IL) : SmallVectorImpl<T>(N) { |
1198 | this->assign(IL); |
1199 | } |
1200 | |
1201 | SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(N) { |
1202 | if (!RHS.empty()) |
1203 | SmallVectorImpl<T>::operator=(RHS); |
1204 | } |
1205 | |
1206 | SmallVector &operator=(const SmallVector &RHS) { |
1207 | SmallVectorImpl<T>::operator=(RHS); |
1208 | return *this; |
1209 | } |
1210 | |
1211 | SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) { |
1212 | if (!RHS.empty()) |
1213 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
1214 | } |
1215 | |
1216 | SmallVector(SmallVectorImpl<T> &&RHS) : SmallVectorImpl<T>(N) { |
1217 | if (!RHS.empty()) |
1218 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
1219 | } |
1220 | |
1221 | SmallVector &operator=(SmallVector &&RHS) { |
1222 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
1223 | return *this; |
1224 | } |
1225 | |
1226 | SmallVector &operator=(SmallVectorImpl<T> &&RHS) { |
1227 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
1228 | return *this; |
1229 | } |
1230 | |
1231 | SmallVector &operator=(std::initializer_list<T> IL) { |
1232 | this->assign(IL); |
1233 | return *this; |
1234 | } |
1235 | }; |
1236 | |
1237 | template <typename T, unsigned N> |
1238 | inline size_t capacity_in_bytes(const SmallVector<T, N> &X) { |
1239 | return X.capacity_in_bytes(); |
1240 | } |
1241 | |
1242 | /// Given a range of type R, iterate the entire range and return a |
1243 | /// SmallVector with elements of the vector. This is useful, for example, |
1244 | /// when you want to iterate a range and then sort the results. |
1245 | template <unsigned Size, typename R> |
1246 | SmallVector<typename std::remove_const<typename std::remove_reference< |
1247 | decltype(*std::begin(std::declval<R &>()))>::type>::type, |
1248 | Size> |
1249 | to_vector(R &&Range) { |
1250 | return {std::begin(Range), std::end(Range)}; |
1251 | } |
1252 | |
1253 | } // end namespace llvm |
1254 | |
1255 | namespace std { |
1256 | |
1257 | /// Implement std::swap in terms of SmallVector swap. |
1258 | template<typename T> |
1259 | inline void |
1260 | swap(llvm::SmallVectorImpl<T> &LHS, llvm::SmallVectorImpl<T> &RHS) { |
1261 | LHS.swap(RHS); |
1262 | } |
1263 | |
1264 | /// Implement std::swap in terms of SmallVector swap. |
1265 | template<typename T, unsigned N> |
1266 | inline void |
1267 | swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) { |
1268 | LHS.swap(RHS); |
1269 | } |
1270 | |
1271 | } // end namespace std |
1272 | |
1273 | #endif // LLVM_ADT_SMALLVECTOR_H |