File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp |
Warning: | line 1606, column 34 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This pass performs various transformations related to eliminating memcpy | |||
10 | // calls, or transforming sets of stores into memset's. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" | |||
15 | #include "llvm/ADT/DenseSet.h" | |||
16 | #include "llvm/ADT/None.h" | |||
17 | #include "llvm/ADT/STLExtras.h" | |||
18 | #include "llvm/ADT/SmallVector.h" | |||
19 | #include "llvm/ADT/Statistic.h" | |||
20 | #include "llvm/ADT/iterator_range.h" | |||
21 | #include "llvm/Analysis/AliasAnalysis.h" | |||
22 | #include "llvm/Analysis/AssumptionCache.h" | |||
23 | #include "llvm/Analysis/GlobalsModRef.h" | |||
24 | #include "llvm/Analysis/Loads.h" | |||
25 | #include "llvm/Analysis/MemoryDependenceAnalysis.h" | |||
26 | #include "llvm/Analysis/MemoryLocation.h" | |||
27 | #include "llvm/Analysis/MemorySSA.h" | |||
28 | #include "llvm/Analysis/MemorySSAUpdater.h" | |||
29 | #include "llvm/Analysis/TargetLibraryInfo.h" | |||
30 | #include "llvm/Analysis/ValueTracking.h" | |||
31 | #include "llvm/IR/Argument.h" | |||
32 | #include "llvm/IR/BasicBlock.h" | |||
33 | #include "llvm/IR/Constants.h" | |||
34 | #include "llvm/IR/DataLayout.h" | |||
35 | #include "llvm/IR/DerivedTypes.h" | |||
36 | #include "llvm/IR/Dominators.h" | |||
37 | #include "llvm/IR/Function.h" | |||
38 | #include "llvm/IR/GetElementPtrTypeIterator.h" | |||
39 | #include "llvm/IR/GlobalVariable.h" | |||
40 | #include "llvm/IR/IRBuilder.h" | |||
41 | #include "llvm/IR/InstrTypes.h" | |||
42 | #include "llvm/IR/Instruction.h" | |||
43 | #include "llvm/IR/Instructions.h" | |||
44 | #include "llvm/IR/IntrinsicInst.h" | |||
45 | #include "llvm/IR/Intrinsics.h" | |||
46 | #include "llvm/IR/LLVMContext.h" | |||
47 | #include "llvm/IR/Module.h" | |||
48 | #include "llvm/IR/Operator.h" | |||
49 | #include "llvm/IR/PassManager.h" | |||
50 | #include "llvm/IR/Type.h" | |||
51 | #include "llvm/IR/User.h" | |||
52 | #include "llvm/IR/Value.h" | |||
53 | #include "llvm/InitializePasses.h" | |||
54 | #include "llvm/Pass.h" | |||
55 | #include "llvm/Support/Casting.h" | |||
56 | #include "llvm/Support/Debug.h" | |||
57 | #include "llvm/Support/MathExtras.h" | |||
58 | #include "llvm/Support/raw_ostream.h" | |||
59 | #include "llvm/Transforms/Scalar.h" | |||
60 | #include "llvm/Transforms/Utils/Local.h" | |||
61 | #include <algorithm> | |||
62 | #include <cassert> | |||
63 | #include <cstdint> | |||
64 | #include <utility> | |||
65 | ||||
66 | using namespace llvm; | |||
67 | ||||
68 | #define DEBUG_TYPE"memcpyopt" "memcpyopt" | |||
69 | ||||
70 | static cl::opt<bool> | |||
71 | EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden, | |||
72 | cl::desc("Use MemorySSA-backed MemCpyOpt.")); | |||
73 | ||||
74 | STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted")static llvm::Statistic NumMemCpyInstr = {"memcpyopt", "NumMemCpyInstr" , "Number of memcpy instructions deleted"}; | |||
75 | STATISTIC(NumMemSetInfer, "Number of memsets inferred")static llvm::Statistic NumMemSetInfer = {"memcpyopt", "NumMemSetInfer" , "Number of memsets inferred"}; | |||
76 | STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy")static llvm::Statistic NumMoveToCpy = {"memcpyopt", "NumMoveToCpy" , "Number of memmoves converted to memcpy"}; | |||
77 | STATISTIC(NumCpyToSet, "Number of memcpys converted to memset")static llvm::Statistic NumCpyToSet = {"memcpyopt", "NumCpyToSet" , "Number of memcpys converted to memset"}; | |||
78 | STATISTIC(NumCallSlot, "Number of call slot optimizations performed")static llvm::Statistic NumCallSlot = {"memcpyopt", "NumCallSlot" , "Number of call slot optimizations performed"}; | |||
79 | ||||
80 | namespace { | |||
81 | ||||
82 | /// Represents a range of memset'd bytes with the ByteVal value. | |||
83 | /// This allows us to analyze stores like: | |||
84 | /// store 0 -> P+1 | |||
85 | /// store 0 -> P+0 | |||
86 | /// store 0 -> P+3 | |||
87 | /// store 0 -> P+2 | |||
88 | /// which sometimes happens with stores to arrays of structs etc. When we see | |||
89 | /// the first store, we make a range [1, 2). The second store extends the range | |||
90 | /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the | |||
91 | /// two ranges into [0, 3) which is memset'able. | |||
92 | struct MemsetRange { | |||
93 | // Start/End - A semi range that describes the span that this range covers. | |||
94 | // The range is closed at the start and open at the end: [Start, End). | |||
95 | int64_t Start, End; | |||
96 | ||||
97 | /// StartPtr - The getelementptr instruction that points to the start of the | |||
98 | /// range. | |||
99 | Value *StartPtr; | |||
100 | ||||
101 | /// Alignment - The known alignment of the first store. | |||
102 | unsigned Alignment; | |||
103 | ||||
104 | /// TheStores - The actual stores that make up this range. | |||
105 | SmallVector<Instruction*, 16> TheStores; | |||
106 | ||||
107 | bool isProfitableToUseMemset(const DataLayout &DL) const; | |||
108 | }; | |||
109 | ||||
110 | } // end anonymous namespace | |||
111 | ||||
112 | bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { | |||
113 | // If we found more than 4 stores to merge or 16 bytes, use memset. | |||
114 | if (TheStores.size() >= 4 || End-Start >= 16) return true; | |||
115 | ||||
116 | // If there is nothing to merge, don't do anything. | |||
117 | if (TheStores.size() < 2) return false; | |||
118 | ||||
119 | // If any of the stores are a memset, then it is always good to extend the | |||
120 | // memset. | |||
121 | for (Instruction *SI : TheStores) | |||
122 | if (!isa<StoreInst>(SI)) | |||
123 | return true; | |||
124 | ||||
125 | // Assume that the code generator is capable of merging pairs of stores | |||
126 | // together if it wants to. | |||
127 | if (TheStores.size() == 2) return false; | |||
128 | ||||
129 | // If we have fewer than 8 stores, it can still be worthwhile to do this. | |||
130 | // For example, merging 4 i8 stores into an i32 store is useful almost always. | |||
131 | // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the | |||
132 | // memset will be split into 2 32-bit stores anyway) and doing so can | |||
133 | // pessimize the llvm optimizer. | |||
134 | // | |||
135 | // Since we don't have perfect knowledge here, make some assumptions: assume | |||
136 | // the maximum GPR width is the same size as the largest legal integer | |||
137 | // size. If so, check to see whether we will end up actually reducing the | |||
138 | // number of stores used. | |||
139 | unsigned Bytes = unsigned(End-Start); | |||
140 | unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8; | |||
141 | if (MaxIntSize == 0) | |||
142 | MaxIntSize = 1; | |||
143 | unsigned NumPointerStores = Bytes / MaxIntSize; | |||
144 | ||||
145 | // Assume the remaining bytes if any are done a byte at a time. | |||
146 | unsigned NumByteStores = Bytes % MaxIntSize; | |||
147 | ||||
148 | // If we will reduce the # stores (according to this heuristic), do the | |||
149 | // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 | |||
150 | // etc. | |||
151 | return TheStores.size() > NumPointerStores+NumByteStores; | |||
152 | } | |||
153 | ||||
154 | namespace { | |||
155 | ||||
156 | class MemsetRanges { | |||
157 | using range_iterator = SmallVectorImpl<MemsetRange>::iterator; | |||
158 | ||||
159 | /// A sorted list of the memset ranges. | |||
160 | SmallVector<MemsetRange, 8> Ranges; | |||
161 | ||||
162 | const DataLayout &DL; | |||
163 | ||||
164 | public: | |||
165 | MemsetRanges(const DataLayout &DL) : DL(DL) {} | |||
166 | ||||
167 | using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator; | |||
168 | ||||
169 | const_iterator begin() const { return Ranges.begin(); } | |||
170 | const_iterator end() const { return Ranges.end(); } | |||
171 | bool empty() const { return Ranges.empty(); } | |||
172 | ||||
173 | void addInst(int64_t OffsetFromFirst, Instruction *Inst) { | |||
174 | if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) | |||
175 | addStore(OffsetFromFirst, SI); | |||
176 | else | |||
177 | addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst)); | |||
178 | } | |||
179 | ||||
180 | void addStore(int64_t OffsetFromFirst, StoreInst *SI) { | |||
181 | TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType()); | |||
182 | assert(!StoreSize.isScalable() && "Can't track scalable-typed stores")((void)0); | |||
183 | addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(), | |||
184 | SI->getAlign().value(), SI); | |||
185 | } | |||
186 | ||||
187 | void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { | |||
188 | int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue(); | |||
189 | addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI); | |||
190 | } | |||
191 | ||||
192 | void addRange(int64_t Start, int64_t Size, Value *Ptr, | |||
193 | unsigned Alignment, Instruction *Inst); | |||
194 | }; | |||
195 | ||||
196 | } // end anonymous namespace | |||
197 | ||||
198 | /// Add a new store to the MemsetRanges data structure. This adds a | |||
199 | /// new range for the specified store at the specified offset, merging into | |||
200 | /// existing ranges as appropriate. | |||
201 | void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, | |||
202 | unsigned Alignment, Instruction *Inst) { | |||
203 | int64_t End = Start+Size; | |||
204 | ||||
205 | range_iterator I = partition_point( | |||
206 | Ranges, [=](const MemsetRange &O) { return O.End < Start; }); | |||
207 | ||||
208 | // We now know that I == E, in which case we didn't find anything to merge | |||
209 | // with, or that Start <= I->End. If End < I->Start or I == E, then we need | |||
210 | // to insert a new range. Handle this now. | |||
211 | if (I == Ranges.end() || End < I->Start) { | |||
212 | MemsetRange &R = *Ranges.insert(I, MemsetRange()); | |||
213 | R.Start = Start; | |||
214 | R.End = End; | |||
215 | R.StartPtr = Ptr; | |||
216 | R.Alignment = Alignment; | |||
217 | R.TheStores.push_back(Inst); | |||
218 | return; | |||
219 | } | |||
220 | ||||
221 | // This store overlaps with I, add it. | |||
222 | I->TheStores.push_back(Inst); | |||
223 | ||||
224 | // At this point, we may have an interval that completely contains our store. | |||
225 | // If so, just add it to the interval and return. | |||
226 | if (I->Start <= Start && I->End >= End) | |||
227 | return; | |||
228 | ||||
229 | // Now we know that Start <= I->End and End >= I->Start so the range overlaps | |||
230 | // but is not entirely contained within the range. | |||
231 | ||||
232 | // See if the range extends the start of the range. In this case, it couldn't | |||
233 | // possibly cause it to join the prior range, because otherwise we would have | |||
234 | // stopped on *it*. | |||
235 | if (Start < I->Start) { | |||
236 | I->Start = Start; | |||
237 | I->StartPtr = Ptr; | |||
238 | I->Alignment = Alignment; | |||
239 | } | |||
240 | ||||
241 | // Now we know that Start <= I->End and Start >= I->Start (so the startpoint | |||
242 | // is in or right at the end of I), and that End >= I->Start. Extend I out to | |||
243 | // End. | |||
244 | if (End > I->End) { | |||
245 | I->End = End; | |||
246 | range_iterator NextI = I; | |||
247 | while (++NextI != Ranges.end() && End >= NextI->Start) { | |||
248 | // Merge the range in. | |||
249 | I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end()); | |||
250 | if (NextI->End > I->End) | |||
251 | I->End = NextI->End; | |||
252 | Ranges.erase(NextI); | |||
253 | NextI = I; | |||
254 | } | |||
255 | } | |||
256 | } | |||
257 | ||||
258 | //===----------------------------------------------------------------------===// | |||
259 | // MemCpyOptLegacyPass Pass | |||
260 | //===----------------------------------------------------------------------===// | |||
261 | ||||
262 | namespace { | |||
263 | ||||
264 | class MemCpyOptLegacyPass : public FunctionPass { | |||
265 | MemCpyOptPass Impl; | |||
266 | ||||
267 | public: | |||
268 | static char ID; // Pass identification, replacement for typeid | |||
269 | ||||
270 | MemCpyOptLegacyPass() : FunctionPass(ID) { | |||
271 | initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry()); | |||
272 | } | |||
273 | ||||
274 | bool runOnFunction(Function &F) override; | |||
275 | ||||
276 | private: | |||
277 | // This transformation requires dominator postdominator info | |||
278 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
279 | AU.setPreservesCFG(); | |||
280 | AU.addRequired<AssumptionCacheTracker>(); | |||
281 | AU.addRequired<DominatorTreeWrapperPass>(); | |||
282 | AU.addPreserved<DominatorTreeWrapperPass>(); | |||
283 | AU.addPreserved<GlobalsAAWrapperPass>(); | |||
284 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | |||
285 | if (!EnableMemorySSA) | |||
286 | AU.addRequired<MemoryDependenceWrapperPass>(); | |||
287 | AU.addPreserved<MemoryDependenceWrapperPass>(); | |||
288 | AU.addRequired<AAResultsWrapperPass>(); | |||
289 | AU.addPreserved<AAResultsWrapperPass>(); | |||
290 | if (EnableMemorySSA) | |||
291 | AU.addRequired<MemorySSAWrapperPass>(); | |||
292 | AU.addPreserved<MemorySSAWrapperPass>(); | |||
293 | } | |||
294 | }; | |||
295 | ||||
296 | } // end anonymous namespace | |||
297 | ||||
298 | char MemCpyOptLegacyPass::ID = 0; | |||
299 | ||||
300 | /// The public interface to this file... | |||
301 | FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); } | |||
302 | ||||
303 | INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",static void *initializeMemCpyOptLegacyPassPassOnce(PassRegistry &Registry) { | |||
304 | false, false)static void *initializeMemCpyOptLegacyPassPassOnce(PassRegistry &Registry) { | |||
305 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry); | |||
306 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | |||
307 | INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)initializeMemoryDependenceWrapperPassPass(Registry); | |||
308 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); | |||
309 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry); | |||
310 | INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)initializeGlobalsAAWrapperPassPass(Registry); | |||
311 | INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry); | |||
312 | INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",PassInfo *PI = new PassInfo( "MemCpy Optimization", "memcpyopt" , &MemCpyOptLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <MemCpyOptLegacyPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeMemCpyOptLegacyPassPassFlag ; void llvm::initializeMemCpyOptLegacyPassPass(PassRegistry & Registry) { llvm::call_once(InitializeMemCpyOptLegacyPassPassFlag , initializeMemCpyOptLegacyPassPassOnce, std::ref(Registry)); } | |||
313 | false, false)PassInfo *PI = new PassInfo( "MemCpy Optimization", "memcpyopt" , &MemCpyOptLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <MemCpyOptLegacyPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeMemCpyOptLegacyPassPassFlag ; void llvm::initializeMemCpyOptLegacyPassPass(PassRegistry & Registry) { llvm::call_once(InitializeMemCpyOptLegacyPassPassFlag , initializeMemCpyOptLegacyPassPassOnce, std::ref(Registry)); } | |||
314 | ||||
315 | // Check that V is either not accessible by the caller, or unwinding cannot | |||
316 | // occur between Start and End. | |||
317 | static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, | |||
318 | Instruction *End) { | |||
319 | assert(Start->getParent() == End->getParent() && "Must be in same block")((void)0); | |||
320 | if (!Start->getFunction()->doesNotThrow() && | |||
321 | !isa<AllocaInst>(getUnderlyingObject(V))) { | |||
322 | for (const Instruction &I : | |||
323 | make_range(Start->getIterator(), End->getIterator())) { | |||
324 | if (I.mayThrow()) | |||
325 | return true; | |||
326 | } | |||
327 | } | |||
328 | return false; | |||
329 | } | |||
330 | ||||
331 | void MemCpyOptPass::eraseInstruction(Instruction *I) { | |||
332 | if (MSSAU) | |||
333 | MSSAU->removeMemoryAccess(I); | |||
334 | if (MD) | |||
335 | MD->removeInstruction(I); | |||
336 | I->eraseFromParent(); | |||
337 | } | |||
338 | ||||
339 | // Check for mod or ref of Loc between Start and End, excluding both boundaries. | |||
340 | // Start and End must be in the same block | |||
341 | static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, | |||
342 | const MemoryUseOrDef *Start, | |||
343 | const MemoryUseOrDef *End) { | |||
344 | assert(Start->getBlock() == End->getBlock() && "Only local supported")((void)0); | |||
345 | for (const MemoryAccess &MA : | |||
346 | make_range(++Start->getIterator(), End->getIterator())) { | |||
347 | if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(), | |||
348 | Loc))) | |||
349 | return true; | |||
350 | } | |||
351 | return false; | |||
352 | } | |||
353 | ||||
354 | // Check for mod of Loc between Start and End, excluding both boundaries. | |||
355 | // Start and End can be in different blocks. | |||
356 | static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc, | |||
357 | const MemoryUseOrDef *Start, | |||
358 | const MemoryUseOrDef *End) { | |||
359 | // TODO: Only walk until we hit Start. | |||
360 | MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( | |||
361 | End->getDefiningAccess(), Loc); | |||
362 | return !MSSA->dominates(Clobber, Start); | |||
363 | } | |||
364 | ||||
365 | /// When scanning forward over instructions, we look for some other patterns to | |||
366 | /// fold away. In particular, this looks for stores to neighboring locations of | |||
367 | /// memory. If it sees enough consecutive ones, it attempts to merge them | |||
368 | /// together into a memcpy/memset. | |||
369 | Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, | |||
370 | Value *StartPtr, | |||
371 | Value *ByteVal) { | |||
372 | const DataLayout &DL = StartInst->getModule()->getDataLayout(); | |||
373 | ||||
374 | // We can't track scalable types | |||
375 | if (StoreInst *SI = dyn_cast<StoreInst>(StartInst)) | |||
376 | if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable()) | |||
377 | return nullptr; | |||
378 | ||||
379 | // Okay, so we now have a single store that can be splatable. Scan to find | |||
380 | // all subsequent stores of the same value to offset from the same pointer. | |||
381 | // Join these together into ranges, so we can decide whether contiguous blocks | |||
382 | // are stored. | |||
383 | MemsetRanges Ranges(DL); | |||
384 | ||||
385 | BasicBlock::iterator BI(StartInst); | |||
386 | ||||
387 | // Keeps track of the last memory use or def before the insertion point for | |||
388 | // the new memset. The new MemoryDef for the inserted memsets will be inserted | |||
389 | // after MemInsertPoint. It points to either LastMemDef or to the last user | |||
390 | // before the insertion point of the memset, if there are any such users. | |||
391 | MemoryUseOrDef *MemInsertPoint = nullptr; | |||
392 | // Keeps track of the last MemoryDef between StartInst and the insertion point | |||
393 | // for the new memset. This will become the defining access of the inserted | |||
394 | // memsets. | |||
395 | MemoryDef *LastMemDef = nullptr; | |||
396 | for (++BI; !BI->isTerminator(); ++BI) { | |||
397 | if (MSSAU) { | |||
398 | auto *CurrentAcc = cast_or_null<MemoryUseOrDef>( | |||
399 | MSSAU->getMemorySSA()->getMemoryAccess(&*BI)); | |||
400 | if (CurrentAcc) { | |||
401 | MemInsertPoint = CurrentAcc; | |||
402 | if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc)) | |||
403 | LastMemDef = CurrentDef; | |||
404 | } | |||
405 | } | |||
406 | ||||
407 | // Calls that only access inaccessible memory do not block merging | |||
408 | // accessible stores. | |||
409 | if (auto *CB = dyn_cast<CallBase>(BI)) { | |||
410 | if (CB->onlyAccessesInaccessibleMemory()) | |||
411 | continue; | |||
412 | } | |||
413 | ||||
414 | if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { | |||
415 | // If the instruction is readnone, ignore it, otherwise bail out. We | |||
416 | // don't even allow readonly here because we don't want something like: | |||
417 | // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). | |||
418 | if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) | |||
419 | break; | |||
420 | continue; | |||
421 | } | |||
422 | ||||
423 | if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { | |||
424 | // If this is a store, see if we can merge it in. | |||
425 | if (!NextStore->isSimple()) break; | |||
426 | ||||
427 | Value *StoredVal = NextStore->getValueOperand(); | |||
428 | ||||
429 | // Don't convert stores of non-integral pointer types to memsets (which | |||
430 | // stores integers). | |||
431 | if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) | |||
432 | break; | |||
433 | ||||
434 | // We can't track ranges involving scalable types. | |||
435 | if (DL.getTypeStoreSize(StoredVal->getType()).isScalable()) | |||
436 | break; | |||
437 | ||||
438 | // Check to see if this stored value is of the same byte-splattable value. | |||
439 | Value *StoredByte = isBytewiseValue(StoredVal, DL); | |||
440 | if (isa<UndefValue>(ByteVal) && StoredByte) | |||
441 | ByteVal = StoredByte; | |||
442 | if (ByteVal != StoredByte) | |||
443 | break; | |||
444 | ||||
445 | // Check to see if this store is to a constant offset from the start ptr. | |||
446 | Optional<int64_t> Offset = | |||
447 | isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL); | |||
448 | if (!Offset) | |||
449 | break; | |||
450 | ||||
451 | Ranges.addStore(*Offset, NextStore); | |||
452 | } else { | |||
453 | MemSetInst *MSI = cast<MemSetInst>(BI); | |||
454 | ||||
455 | if (MSI->isVolatile() || ByteVal != MSI->getValue() || | |||
456 | !isa<ConstantInt>(MSI->getLength())) | |||
457 | break; | |||
458 | ||||
459 | // Check to see if this store is to a constant offset from the start ptr. | |||
460 | Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), DL); | |||
461 | if (!Offset) | |||
462 | break; | |||
463 | ||||
464 | Ranges.addMemSet(*Offset, MSI); | |||
465 | } | |||
466 | } | |||
467 | ||||
468 | // If we have no ranges, then we just had a single store with nothing that | |||
469 | // could be merged in. This is a very common case of course. | |||
470 | if (Ranges.empty()) | |||
471 | return nullptr; | |||
472 | ||||
473 | // If we had at least one store that could be merged in, add the starting | |||
474 | // store as well. We try to avoid this unless there is at least something | |||
475 | // interesting as a small compile-time optimization. | |||
476 | Ranges.addInst(0, StartInst); | |||
477 | ||||
478 | // If we create any memsets, we put it right before the first instruction that | |||
479 | // isn't part of the memset block. This ensure that the memset is dominated | |||
480 | // by any addressing instruction needed by the start of the block. | |||
481 | IRBuilder<> Builder(&*BI); | |||
482 | ||||
483 | // Now that we have full information about ranges, loop over the ranges and | |||
484 | // emit memset's for anything big enough to be worthwhile. | |||
485 | Instruction *AMemSet = nullptr; | |||
486 | for (const MemsetRange &Range : Ranges) { | |||
487 | if (Range.TheStores.size() == 1) continue; | |||
488 | ||||
489 | // If it is profitable to lower this range to memset, do so now. | |||
490 | if (!Range.isProfitableToUseMemset(DL)) | |||
491 | continue; | |||
492 | ||||
493 | // Otherwise, we do want to transform this! Create a new memset. | |||
494 | // Get the starting pointer of the block. | |||
495 | StartPtr = Range.StartPtr; | |||
496 | ||||
497 | AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start, | |||
498 | MaybeAlign(Range.Alignment)); | |||
499 | LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SIdo { } while (false) | |||
500 | : Range.TheStores) dbgs()do { } while (false) | |||
501 | << *SI << '\n';do { } while (false) | |||
502 | dbgs() << "With: " << *AMemSet << '\n')do { } while (false); | |||
503 | if (!Range.TheStores.empty()) | |||
504 | AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); | |||
505 | ||||
506 | if (MSSAU) { | |||
507 | assert(LastMemDef && MemInsertPoint &&((void)0) | |||
508 | "Both LastMemDef and MemInsertPoint need to be set")((void)0); | |||
509 | auto *NewDef = | |||
510 | cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI | |||
511 | ? MSSAU->createMemoryAccessBefore( | |||
512 | AMemSet, LastMemDef, MemInsertPoint) | |||
513 | : MSSAU->createMemoryAccessAfter( | |||
514 | AMemSet, LastMemDef, MemInsertPoint)); | |||
515 | MSSAU->insertDef(NewDef, /*RenameUses=*/true); | |||
516 | LastMemDef = NewDef; | |||
517 | MemInsertPoint = NewDef; | |||
518 | } | |||
519 | ||||
520 | // Zap all the stores. | |||
521 | for (Instruction *SI : Range.TheStores) | |||
522 | eraseInstruction(SI); | |||
523 | ||||
524 | ++NumMemSetInfer; | |||
525 | } | |||
526 | ||||
527 | return AMemSet; | |||
528 | } | |||
529 | ||||
530 | // This method try to lift a store instruction before position P. | |||
531 | // It will lift the store and its argument + that anything that | |||
532 | // may alias with these. | |||
533 | // The method returns true if it was successful. | |||
534 | bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { | |||
535 | // If the store alias this position, early bail out. | |||
536 | MemoryLocation StoreLoc = MemoryLocation::get(SI); | |||
537 | if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc))) | |||
538 | return false; | |||
539 | ||||
540 | // Keep track of the arguments of all instruction we plan to lift | |||
541 | // so we can make sure to lift them as well if appropriate. | |||
542 | DenseSet<Instruction*> Args; | |||
543 | if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) | |||
544 | if (Ptr->getParent() == SI->getParent()) | |||
545 | Args.insert(Ptr); | |||
546 | ||||
547 | // Instruction to lift before P. | |||
548 | SmallVector<Instruction *, 8> ToLift{SI}; | |||
549 | ||||
550 | // Memory locations of lifted instructions. | |||
551 | SmallVector<MemoryLocation, 8> MemLocs{StoreLoc}; | |||
552 | ||||
553 | // Lifted calls. | |||
554 | SmallVector<const CallBase *, 8> Calls; | |||
555 | ||||
556 | const MemoryLocation LoadLoc = MemoryLocation::get(LI); | |||
557 | ||||
558 | for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) { | |||
559 | auto *C = &*I; | |||
560 | ||||
561 | // Make sure hoisting does not perform a store that was not guaranteed to | |||
562 | // happen. | |||
563 | if (!isGuaranteedToTransferExecutionToSuccessor(C)) | |||
564 | return false; | |||
565 | ||||
566 | bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None)); | |||
567 | ||||
568 | bool NeedLift = false; | |||
569 | if (Args.erase(C)) | |||
570 | NeedLift = true; | |||
571 | else if (MayAlias) { | |||
572 | NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) { | |||
573 | return isModOrRefSet(AA->getModRefInfo(C, ML)); | |||
574 | }); | |||
575 | ||||
576 | if (!NeedLift) | |||
577 | NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) { | |||
578 | return isModOrRefSet(AA->getModRefInfo(C, Call)); | |||
579 | }); | |||
580 | } | |||
581 | ||||
582 | if (!NeedLift) | |||
583 | continue; | |||
584 | ||||
585 | if (MayAlias) { | |||
586 | // Since LI is implicitly moved downwards past the lifted instructions, | |||
587 | // none of them may modify its source. | |||
588 | if (isModSet(AA->getModRefInfo(C, LoadLoc))) | |||
589 | return false; | |||
590 | else if (const auto *Call = dyn_cast<CallBase>(C)) { | |||
591 | // If we can't lift this before P, it's game over. | |||
592 | if (isModOrRefSet(AA->getModRefInfo(P, Call))) | |||
593 | return false; | |||
594 | ||||
595 | Calls.push_back(Call); | |||
596 | } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) { | |||
597 | // If we can't lift this before P, it's game over. | |||
598 | auto ML = MemoryLocation::get(C); | |||
599 | if (isModOrRefSet(AA->getModRefInfo(P, ML))) | |||
600 | return false; | |||
601 | ||||
602 | MemLocs.push_back(ML); | |||
603 | } else | |||
604 | // We don't know how to lift this instruction. | |||
605 | return false; | |||
606 | } | |||
607 | ||||
608 | ToLift.push_back(C); | |||
609 | for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k) | |||
610 | if (auto *A = dyn_cast<Instruction>(C->getOperand(k))) { | |||
611 | if (A->getParent() == SI->getParent()) { | |||
612 | // Cannot hoist user of P above P | |||
613 | if(A == P) return false; | |||
614 | Args.insert(A); | |||
615 | } | |||
616 | } | |||
617 | } | |||
618 | ||||
619 | // Find MSSA insertion point. Normally P will always have a corresponding | |||
620 | // memory access before which we can insert. However, with non-standard AA | |||
621 | // pipelines, there may be a mismatch between AA and MSSA, in which case we | |||
622 | // will scan for a memory access before P. In either case, we know for sure | |||
623 | // that at least the load will have a memory access. | |||
624 | // TODO: Simplify this once P will be determined by MSSA, in which case the | |||
625 | // discrepancy can no longer occur. | |||
626 | MemoryUseOrDef *MemInsertPoint = nullptr; | |||
627 | if (MSSAU) { | |||
628 | if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) { | |||
629 | MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator()); | |||
630 | } else { | |||
631 | const Instruction *ConstP = P; | |||
632 | for (const Instruction &I : make_range(++ConstP->getReverseIterator(), | |||
633 | ++LI->getReverseIterator())) { | |||
634 | if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) { | |||
635 | MemInsertPoint = MA; | |||
636 | break; | |||
637 | } | |||
638 | } | |||
639 | } | |||
640 | } | |||
641 | ||||
642 | // We made it, we need to lift. | |||
643 | for (auto *I : llvm::reverse(ToLift)) { | |||
644 | LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n")do { } while (false); | |||
645 | I->moveBefore(P); | |||
646 | if (MSSAU) { | |||
647 | assert(MemInsertPoint && "Must have found insert point")((void)0); | |||
648 | if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) { | |||
649 | MSSAU->moveAfter(MA, MemInsertPoint); | |||
650 | MemInsertPoint = MA; | |||
651 | } | |||
652 | } | |||
653 | } | |||
654 | ||||
655 | return true; | |||
656 | } | |||
657 | ||||
658 | bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { | |||
659 | if (!SI->isSimple()) return false; | |||
660 | ||||
661 | // Avoid merging nontemporal stores since the resulting | |||
662 | // memcpy/memset would not be able to preserve the nontemporal hint. | |||
663 | // In theory we could teach how to propagate the !nontemporal metadata to | |||
664 | // memset calls. However, that change would force the backend to | |||
665 | // conservatively expand !nontemporal memset calls back to sequences of | |||
666 | // store instructions (effectively undoing the merging). | |||
667 | if (SI->getMetadata(LLVMContext::MD_nontemporal)) | |||
668 | return false; | |||
669 | ||||
670 | const DataLayout &DL = SI->getModule()->getDataLayout(); | |||
671 | ||||
672 | Value *StoredVal = SI->getValueOperand(); | |||
673 | ||||
674 | // Not all the transforms below are correct for non-integral pointers, bail | |||
675 | // until we've audited the individual pieces. | |||
676 | if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) | |||
677 | return false; | |||
678 | ||||
679 | // Load to store forwarding can be interpreted as memcpy. | |||
680 | if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { | |||
681 | if (LI->isSimple() && LI->hasOneUse() && | |||
682 | LI->getParent() == SI->getParent()) { | |||
683 | ||||
684 | auto *T = LI->getType(); | |||
685 | if (T->isAggregateType()) { | |||
686 | MemoryLocation LoadLoc = MemoryLocation::get(LI); | |||
687 | ||||
688 | // We use alias analysis to check if an instruction may store to | |||
689 | // the memory we load from in between the load and the store. If | |||
690 | // such an instruction is found, we try to promote there instead | |||
691 | // of at the store position. | |||
692 | // TODO: Can use MSSA for this. | |||
693 | Instruction *P = SI; | |||
694 | for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) { | |||
695 | if (isModSet(AA->getModRefInfo(&I, LoadLoc))) { | |||
696 | P = &I; | |||
697 | break; | |||
698 | } | |||
699 | } | |||
700 | ||||
701 | // We found an instruction that may write to the loaded memory. | |||
702 | // We can try to promote at this position instead of the store | |||
703 | // position if nothing aliases the store memory after this and the store | |||
704 | // destination is not in the range. | |||
705 | if (P && P != SI) { | |||
706 | if (!moveUp(SI, P, LI)) | |||
707 | P = nullptr; | |||
708 | } | |||
709 | ||||
710 | // If a valid insertion position is found, then we can promote | |||
711 | // the load/store pair to a memcpy. | |||
712 | if (P) { | |||
713 | // If we load from memory that may alias the memory we store to, | |||
714 | // memmove must be used to preserve semantic. If not, memcpy can | |||
715 | // be used. | |||
716 | bool UseMemMove = false; | |||
717 | if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc)) | |||
718 | UseMemMove = true; | |||
719 | ||||
720 | uint64_t Size = DL.getTypeStoreSize(T); | |||
721 | ||||
722 | IRBuilder<> Builder(P); | |||
723 | Instruction *M; | |||
724 | if (UseMemMove) | |||
725 | M = Builder.CreateMemMove( | |||
726 | SI->getPointerOperand(), SI->getAlign(), | |||
727 | LI->getPointerOperand(), LI->getAlign(), Size); | |||
728 | else | |||
729 | M = Builder.CreateMemCpy( | |||
730 | SI->getPointerOperand(), SI->getAlign(), | |||
731 | LI->getPointerOperand(), LI->getAlign(), Size); | |||
732 | ||||
733 | LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "do { } while (false) | |||
734 | << *M << "\n")do { } while (false); | |||
735 | ||||
736 | if (MSSAU) { | |||
737 | auto *LastDef = | |||
738 | cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)); | |||
739 | auto *NewAccess = | |||
740 | MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); | |||
741 | MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); | |||
742 | } | |||
743 | ||||
744 | eraseInstruction(SI); | |||
745 | eraseInstruction(LI); | |||
746 | ++NumMemCpyInstr; | |||
747 | ||||
748 | // Make sure we do not invalidate the iterator. | |||
749 | BBI = M->getIterator(); | |||
750 | return true; | |||
751 | } | |||
752 | } | |||
753 | ||||
754 | // Detect cases where we're performing call slot forwarding, but | |||
755 | // happen to be using a load-store pair to implement it, rather than | |||
756 | // a memcpy. | |||
757 | CallInst *C = nullptr; | |||
758 | if (EnableMemorySSA) { | |||
759 | if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>( | |||
760 | MSSA->getWalker()->getClobberingMemoryAccess(LI))) { | |||
761 | // The load most post-dom the call. Limit to the same block for now. | |||
762 | // TODO: Support non-local call-slot optimization? | |||
763 | if (LoadClobber->getBlock() == SI->getParent()) | |||
764 | C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst()); | |||
765 | } | |||
766 | } else { | |||
767 | MemDepResult ldep = MD->getDependency(LI); | |||
768 | if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) | |||
769 | C = dyn_cast<CallInst>(ldep.getInst()); | |||
770 | } | |||
771 | ||||
772 | if (C) { | |||
773 | // Check that nothing touches the dest of the "copy" between | |||
774 | // the call and the store. | |||
775 | MemoryLocation StoreLoc = MemoryLocation::get(SI); | |||
776 | if (EnableMemorySSA) { | |||
777 | if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C), | |||
778 | MSSA->getMemoryAccess(SI))) | |||
779 | C = nullptr; | |||
780 | } else { | |||
781 | for (BasicBlock::iterator I = --SI->getIterator(), | |||
782 | E = C->getIterator(); | |||
783 | I != E; --I) { | |||
784 | if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) { | |||
785 | C = nullptr; | |||
786 | break; | |||
787 | } | |||
788 | } | |||
789 | } | |||
790 | } | |||
791 | ||||
792 | if (C) { | |||
793 | bool changed = performCallSlotOptzn( | |||
794 | LI, SI, SI->getPointerOperand()->stripPointerCasts(), | |||
795 | LI->getPointerOperand()->stripPointerCasts(), | |||
796 | DL.getTypeStoreSize(SI->getOperand(0)->getType()), | |||
797 | commonAlignment(SI->getAlign(), LI->getAlign()), C); | |||
798 | if (changed) { | |||
799 | eraseInstruction(SI); | |||
800 | eraseInstruction(LI); | |||
801 | ++NumMemCpyInstr; | |||
802 | return true; | |||
803 | } | |||
804 | } | |||
805 | } | |||
806 | } | |||
807 | ||||
808 | // There are two cases that are interesting for this code to handle: memcpy | |||
809 | // and memset. Right now we only handle memset. | |||
810 | ||||
811 | // Ensure that the value being stored is something that can be memset'able a | |||
812 | // byte at a time like "0" or "-1" or any width, as well as things like | |||
813 | // 0xA0A0A0A0 and 0.0. | |||
814 | auto *V = SI->getOperand(0); | |||
815 | if (Value *ByteVal = isBytewiseValue(V, DL)) { | |||
816 | if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(), | |||
817 | ByteVal)) { | |||
818 | BBI = I->getIterator(); // Don't invalidate iterator. | |||
819 | return true; | |||
820 | } | |||
821 | ||||
822 | // If we have an aggregate, we try to promote it to memset regardless | |||
823 | // of opportunity for merging as it can expose optimization opportunities | |||
824 | // in subsequent passes. | |||
825 | auto *T = V->getType(); | |||
826 | if (T->isAggregateType()) { | |||
827 | uint64_t Size = DL.getTypeStoreSize(T); | |||
828 | IRBuilder<> Builder(SI); | |||
829 | auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, | |||
830 | SI->getAlign()); | |||
831 | ||||
832 | LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n")do { } while (false); | |||
833 | ||||
834 | if (MSSAU) { | |||
835 | assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)))((void)0); | |||
836 | auto *LastDef = | |||
837 | cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)); | |||
838 | auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); | |||
839 | MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); | |||
840 | } | |||
841 | ||||
842 | eraseInstruction(SI); | |||
843 | NumMemSetInfer++; | |||
844 | ||||
845 | // Make sure we do not invalidate the iterator. | |||
846 | BBI = M->getIterator(); | |||
847 | return true; | |||
848 | } | |||
849 | } | |||
850 | ||||
851 | return false; | |||
852 | } | |||
853 | ||||
854 | bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { | |||
855 | // See if there is another memset or store neighboring this memset which | |||
856 | // allows us to widen out the memset to do a single larger store. | |||
857 | if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile()) | |||
858 | if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(), | |||
859 | MSI->getValue())) { | |||
860 | BBI = I->getIterator(); // Don't invalidate iterator. | |||
861 | return true; | |||
862 | } | |||
863 | return false; | |||
864 | } | |||
865 | ||||
866 | /// Takes a memcpy and a call that it depends on, | |||
867 | /// and checks for the possibility of a call slot optimization by having | |||
868 | /// the call write its result directly into the destination of the memcpy. | |||
869 | bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, | |||
870 | Instruction *cpyStore, Value *cpyDest, | |||
871 | Value *cpySrc, TypeSize cpySize, | |||
872 | Align cpyAlign, CallInst *C) { | |||
873 | // The general transformation to keep in mind is | |||
874 | // | |||
875 | // call @func(..., src, ...) | |||
876 | // memcpy(dest, src, ...) | |||
877 | // | |||
878 | // -> | |||
879 | // | |||
880 | // memcpy(dest, src, ...) | |||
881 | // call @func(..., dest, ...) | |||
882 | // | |||
883 | // Since moving the memcpy is technically awkward, we additionally check that | |||
884 | // src only holds uninitialized values at the moment of the call, meaning that | |||
885 | // the memcpy can be discarded rather than moved. | |||
886 | ||||
887 | // We can't optimize scalable types. | |||
888 | if (cpySize.isScalable()) | |||
889 | return false; | |||
890 | ||||
891 | // Lifetime marks shouldn't be operated on. | |||
892 | if (Function *F = C->getCalledFunction()) | |||
893 | if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start) | |||
894 | return false; | |||
895 | ||||
896 | // Require that src be an alloca. This simplifies the reasoning considerably. | |||
897 | AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); | |||
898 | if (!srcAlloca) | |||
899 | return false; | |||
900 | ||||
901 | ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); | |||
902 | if (!srcArraySize) | |||
903 | return false; | |||
904 | ||||
905 | const DataLayout &DL = cpyLoad->getModule()->getDataLayout(); | |||
906 | uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) * | |||
907 | srcArraySize->getZExtValue(); | |||
908 | ||||
909 | if (cpySize < srcSize) | |||
910 | return false; | |||
911 | ||||
912 | // Check that accessing the first srcSize bytes of dest will not cause a | |||
913 | // trap. Otherwise the transform is invalid since it might cause a trap | |||
914 | // to occur earlier than it otherwise would. | |||
915 | if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize), | |||
916 | DL, C, DT)) | |||
917 | return false; | |||
918 | ||||
919 | // Make sure that nothing can observe cpyDest being written early. There are | |||
920 | // a number of cases to consider: | |||
921 | // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of | |||
922 | // the transform. | |||
923 | // 2. C itself may not access cpyDest (prior to the transform). This is | |||
924 | // checked further below. | |||
925 | // 3. If cpyDest is accessible to the caller of this function (potentially | |||
926 | // captured and not based on an alloca), we need to ensure that we cannot | |||
927 | // unwind between C and cpyStore. This is checked here. | |||
928 | // 4. If cpyDest is potentially captured, there may be accesses to it from | |||
929 | // another thread. In this case, we need to check that cpyStore is | |||
930 | // guaranteed to be executed if C is. As it is a non-atomic access, it | |||
931 | // renders accesses from other threads undefined. | |||
932 | // TODO: This is currently not checked. | |||
933 | if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) | |||
934 | return false; | |||
935 | ||||
936 | // Check that dest points to memory that is at least as aligned as src. | |||
937 | Align srcAlign = srcAlloca->getAlign(); | |||
938 | bool isDestSufficientlyAligned = srcAlign <= cpyAlign; | |||
939 | // If dest is not aligned enough and we can't increase its alignment then | |||
940 | // bail out. | |||
941 | if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) | |||
942 | return false; | |||
943 | ||||
944 | // Check that src is not accessed except via the call and the memcpy. This | |||
945 | // guarantees that it holds only undefined values when passed in (so the final | |||
946 | // memcpy can be dropped), that it is not read or written between the call and | |||
947 | // the memcpy, and that writing beyond the end of it is undefined. | |||
948 | SmallVector<User *, 8> srcUseList(srcAlloca->users()); | |||
949 | while (!srcUseList.empty()) { | |||
950 | User *U = srcUseList.pop_back_val(); | |||
951 | ||||
952 | if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) { | |||
953 | append_range(srcUseList, U->users()); | |||
954 | continue; | |||
955 | } | |||
956 | if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) { | |||
957 | if (!G->hasAllZeroIndices()) | |||
958 | return false; | |||
959 | ||||
960 | append_range(srcUseList, U->users()); | |||
961 | continue; | |||
962 | } | |||
963 | if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U)) | |||
964 | if (IT->isLifetimeStartOrEnd()) | |||
965 | continue; | |||
966 | ||||
967 | if (U != C && U != cpyLoad) | |||
968 | return false; | |||
969 | } | |||
970 | ||||
971 | // Check that src isn't captured by the called function since the | |||
972 | // transformation can cause aliasing issues in that case. | |||
973 | for (unsigned ArgI = 0, E = C->arg_size(); ArgI != E; ++ArgI) | |||
974 | if (C->getArgOperand(ArgI) == cpySrc && !C->doesNotCapture(ArgI)) | |||
975 | return false; | |||
976 | ||||
977 | // Since we're changing the parameter to the callsite, we need to make sure | |||
978 | // that what would be the new parameter dominates the callsite. | |||
979 | if (!DT->dominates(cpyDest, C)) { | |||
980 | // Support moving a constant index GEP before the call. | |||
981 | auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest); | |||
982 | if (GEP && GEP->hasAllConstantIndices() && | |||
983 | DT->dominates(GEP->getPointerOperand(), C)) | |||
984 | GEP->moveBefore(C); | |||
985 | else | |||
986 | return false; | |||
987 | } | |||
988 | ||||
989 | // In addition to knowing that the call does not access src in some | |||
990 | // unexpected manner, for example via a global, which we deduce from | |||
991 | // the use analysis, we also need to know that it does not sneakily | |||
992 | // access dest. We rely on AA to figure this out for us. | |||
993 | ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize)); | |||
994 | // If necessary, perform additional analysis. | |||
995 | if (isModOrRefSet(MR)) | |||
996 | MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT); | |||
997 | if (isModOrRefSet(MR)) | |||
998 | return false; | |||
999 | ||||
1000 | // We can't create address space casts here because we don't know if they're | |||
1001 | // safe for the target. | |||
1002 | if (cpySrc->getType()->getPointerAddressSpace() != | |||
1003 | cpyDest->getType()->getPointerAddressSpace()) | |||
1004 | return false; | |||
1005 | for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI) | |||
1006 | if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc && | |||
1007 | cpySrc->getType()->getPointerAddressSpace() != | |||
1008 | C->getArgOperand(ArgI)->getType()->getPointerAddressSpace()) | |||
1009 | return false; | |||
1010 | ||||
1011 | // All the checks have passed, so do the transformation. | |||
1012 | bool changedArgument = false; | |||
1013 | for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI) | |||
1014 | if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) { | |||
1015 | Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest | |||
1016 | : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), | |||
1017 | cpyDest->getName(), C); | |||
1018 | changedArgument = true; | |||
1019 | if (C->getArgOperand(ArgI)->getType() == Dest->getType()) | |||
1020 | C->setArgOperand(ArgI, Dest); | |||
1021 | else | |||
1022 | C->setArgOperand(ArgI, CastInst::CreatePointerCast( | |||
1023 | Dest, C->getArgOperand(ArgI)->getType(), | |||
1024 | Dest->getName(), C)); | |||
1025 | } | |||
1026 | ||||
1027 | if (!changedArgument) | |||
1028 | return false; | |||
1029 | ||||
1030 | // If the destination wasn't sufficiently aligned then increase its alignment. | |||
1031 | if (!isDestSufficientlyAligned) { | |||
1032 | assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!")((void)0); | |||
1033 | cast<AllocaInst>(cpyDest)->setAlignment(srcAlign); | |||
1034 | } | |||
1035 | ||||
1036 | // Drop any cached information about the call, because we may have changed | |||
1037 | // its dependence information by changing its parameter. | |||
1038 | if (MD) | |||
1039 | MD->removeInstruction(C); | |||
1040 | ||||
1041 | // Update AA metadata | |||
1042 | // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be | |||
1043 | // handled here, but combineMetadata doesn't support them yet | |||
1044 | unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, | |||
1045 | LLVMContext::MD_noalias, | |||
1046 | LLVMContext::MD_invariant_group, | |||
1047 | LLVMContext::MD_access_group}; | |||
1048 | combineMetadata(C, cpyLoad, KnownIDs, true); | |||
1049 | ||||
1050 | ++NumCallSlot; | |||
1051 | return true; | |||
1052 | } | |||
1053 | ||||
1054 | /// We've found that the (upward scanning) memory dependence of memcpy 'M' is | |||
1055 | /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can. | |||
1056 | bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, | |||
1057 | MemCpyInst *MDep) { | |||
1058 | // We can only transforms memcpy's where the dest of one is the source of the | |||
1059 | // other. | |||
1060 | if (M->getSource() != MDep->getDest() || MDep->isVolatile()) | |||
1061 | return false; | |||
1062 | ||||
1063 | // If dep instruction is reading from our current input, then it is a noop | |||
1064 | // transfer and substituting the input won't change this instruction. Just | |||
1065 | // ignore the input and let someone else zap MDep. This handles cases like: | |||
1066 | // memcpy(a <- a) | |||
1067 | // memcpy(b <- a) | |||
1068 | if (M->getSource() == MDep->getSource()) | |||
1069 | return false; | |||
1070 | ||||
1071 | // Second, the length of the memcpy's must be the same, or the preceding one | |||
1072 | // must be larger than the following one. | |||
1073 | if (MDep->getLength() != M->getLength()) { | |||
1074 | ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); | |||
1075 | ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength()); | |||
1076 | if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue()) | |||
1077 | return false; | |||
1078 | } | |||
1079 | ||||
1080 | // Verify that the copied-from memory doesn't change in between the two | |||
1081 | // transfers. For example, in: | |||
1082 | // memcpy(a <- b) | |||
1083 | // *b = 42; | |||
1084 | // memcpy(c <- a) | |||
1085 | // It would be invalid to transform the second memcpy into memcpy(c <- b). | |||
1086 | // | |||
1087 | // TODO: If the code between M and MDep is transparent to the destination "c", | |||
1088 | // then we could still perform the xform by moving M up to the first memcpy. | |||
1089 | if (EnableMemorySSA) { | |||
1090 | // TODO: It would be sufficient to check the MDep source up to the memcpy | |||
1091 | // size of M, rather than MDep. | |||
1092 | if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), | |||
1093 | MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M))) | |||
1094 | return false; | |||
1095 | } else { | |||
1096 | // NOTE: This is conservative, it will stop on any read from the source loc, | |||
1097 | // not just the defining memcpy. | |||
1098 | MemDepResult SourceDep = | |||
1099 | MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, | |||
1100 | M->getIterator(), M->getParent()); | |||
1101 | if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) | |||
1102 | return false; | |||
1103 | } | |||
1104 | ||||
1105 | // If the dest of the second might alias the source of the first, then the | |||
1106 | // source and dest might overlap. We still want to eliminate the intermediate | |||
1107 | // value, but we have to generate a memmove instead of memcpy. | |||
1108 | bool UseMemMove = false; | |||
1109 | if (!AA->isNoAlias(MemoryLocation::getForDest(M), | |||
1110 | MemoryLocation::getForSource(MDep))) | |||
1111 | UseMemMove = true; | |||
1112 | ||||
1113 | // If all checks passed, then we can transform M. | |||
1114 | LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"do { } while (false) | |||
1115 | << *MDep << '\n' << *M << '\n')do { } while (false); | |||
1116 | ||||
1117 | // TODO: Is this worth it if we're creating a less aligned memcpy? For | |||
1118 | // example we could be moving from movaps -> movq on x86. | |||
1119 | IRBuilder<> Builder(M); | |||
1120 | Instruction *NewM; | |||
1121 | if (UseMemMove) | |||
1122 | NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(), | |||
1123 | MDep->getRawSource(), MDep->getSourceAlign(), | |||
1124 | M->getLength(), M->isVolatile()); | |||
1125 | else if (isa<MemCpyInlineInst>(M)) { | |||
1126 | // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is | |||
1127 | // never allowed since that would allow the latter to be lowered as a call | |||
1128 | // to an external function. | |||
1129 | NewM = Builder.CreateMemCpyInline( | |||
1130 | M->getRawDest(), M->getDestAlign(), MDep->getRawSource(), | |||
1131 | MDep->getSourceAlign(), M->getLength(), M->isVolatile()); | |||
1132 | } else | |||
1133 | NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(), | |||
1134 | MDep->getRawSource(), MDep->getSourceAlign(), | |||
1135 | M->getLength(), M->isVolatile()); | |||
1136 | ||||
1137 | if (MSSAU) { | |||
1138 | assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)))((void)0); | |||
1139 | auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)); | |||
1140 | auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); | |||
1141 | MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); | |||
1142 | } | |||
1143 | ||||
1144 | // Remove the instruction we're replacing. | |||
1145 | eraseInstruction(M); | |||
1146 | ++NumMemCpyInstr; | |||
1147 | return true; | |||
1148 | } | |||
1149 | ||||
1150 | /// We've found that the (upward scanning) memory dependence of \p MemCpy is | |||
1151 | /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that | |||
1152 | /// weren't copied over by \p MemCpy. | |||
1153 | /// | |||
1154 | /// In other words, transform: | |||
1155 | /// \code | |||
1156 | /// memset(dst, c, dst_size); | |||
1157 | /// memcpy(dst, src, src_size); | |||
1158 | /// \endcode | |||
1159 | /// into: | |||
1160 | /// \code | |||
1161 | /// memcpy(dst, src, src_size); | |||
1162 | /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size); | |||
1163 | /// \endcode | |||
1164 | bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, | |||
1165 | MemSetInst *MemSet) { | |||
1166 | // We can only transform memset/memcpy with the same destination. | |||
1167 | if (!AA->isMustAlias(MemSet->getDest(), MemCpy->getDest())) | |||
1168 | return false; | |||
1169 | ||||
1170 | // Check that src and dst of the memcpy aren't the same. While memcpy | |||
1171 | // operands cannot partially overlap, exact equality is allowed. | |||
1172 | if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(), | |||
1173 | LocationSize::precise(1)), | |||
1174 | MemoryLocation(MemCpy->getDest(), | |||
1175 | LocationSize::precise(1)))) | |||
1176 | return false; | |||
1177 | ||||
1178 | if (EnableMemorySSA) { | |||
1179 | // We know that dst up to src_size is not written. We now need to make sure | |||
1180 | // that dst up to dst_size is not accessed. (If we did not move the memset, | |||
1181 | // checking for reads would be sufficient.) | |||
1182 | if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet), | |||
1183 | MSSA->getMemoryAccess(MemSet), | |||
1184 | MSSA->getMemoryAccess(MemCpy))) { | |||
1185 | return false; | |||
1186 | } | |||
1187 | } else { | |||
1188 | // We have already checked that dst up to src_size is not accessed. We | |||
1189 | // need to make sure that there are no accesses up to dst_size either. | |||
1190 | MemDepResult DstDepInfo = MD->getPointerDependencyFrom( | |||
1191 | MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(), | |||
1192 | MemCpy->getParent()); | |||
1193 | if (DstDepInfo.getInst() != MemSet) | |||
1194 | return false; | |||
1195 | } | |||
1196 | ||||
1197 | // Use the same i8* dest as the memcpy, killing the memset dest if different. | |||
1198 | Value *Dest = MemCpy->getRawDest(); | |||
1199 | Value *DestSize = MemSet->getLength(); | |||
1200 | Value *SrcSize = MemCpy->getLength(); | |||
1201 | ||||
1202 | if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy)) | |||
1203 | return false; | |||
1204 | ||||
1205 | // If the sizes are the same, simply drop the memset instead of generating | |||
1206 | // a replacement with zero size. | |||
1207 | if (DestSize == SrcSize) { | |||
1208 | eraseInstruction(MemSet); | |||
1209 | return true; | |||
1210 | } | |||
1211 | ||||
1212 | // By default, create an unaligned memset. | |||
1213 | unsigned Align = 1; | |||
1214 | // If Dest is aligned, and SrcSize is constant, use the minimum alignment | |||
1215 | // of the sum. | |||
1216 | const unsigned DestAlign = | |||
1217 | std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment()); | |||
1218 | if (DestAlign > 1) | |||
1219 | if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize)) | |||
1220 | Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign); | |||
1221 | ||||
1222 | IRBuilder<> Builder(MemCpy); | |||
1223 | ||||
1224 | // If the sizes have different types, zext the smaller one. | |||
1225 | if (DestSize->getType() != SrcSize->getType()) { | |||
1226 | if (DestSize->getType()->getIntegerBitWidth() > | |||
1227 | SrcSize->getType()->getIntegerBitWidth()) | |||
1228 | SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType()); | |||
1229 | else | |||
1230 | DestSize = Builder.CreateZExt(DestSize, SrcSize->getType()); | |||
1231 | } | |||
1232 | ||||
1233 | Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize); | |||
1234 | Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize); | |||
1235 | Value *MemsetLen = Builder.CreateSelect( | |||
1236 | Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff); | |||
1237 | unsigned DestAS = Dest->getType()->getPointerAddressSpace(); | |||
1238 | Instruction *NewMemSet = Builder.CreateMemSet( | |||
1239 | Builder.CreateGEP(Builder.getInt8Ty(), | |||
1240 | Builder.CreatePointerCast(Dest, | |||
1241 | Builder.getInt8PtrTy(DestAS)), | |||
1242 | SrcSize), | |||
1243 | MemSet->getOperand(1), MemsetLen, MaybeAlign(Align)); | |||
1244 | ||||
1245 | if (MSSAU) { | |||
1246 | assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&((void)0) | |||
1247 | "MemCpy must be a MemoryDef")((void)0); | |||
1248 | // The new memset is inserted after the memcpy, but it is known that its | |||
1249 | // defining access is the memset about to be removed which immediately | |||
1250 | // precedes the memcpy. | |||
1251 | auto *LastDef = | |||
1252 | cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); | |||
1253 | auto *NewAccess = MSSAU->createMemoryAccessBefore( | |||
1254 | NewMemSet, LastDef->getDefiningAccess(), LastDef); | |||
1255 | MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); | |||
1256 | } | |||
1257 | ||||
1258 | eraseInstruction(MemSet); | |||
1259 | return true; | |||
1260 | } | |||
1261 | ||||
1262 | /// Determine whether the instruction has undefined content for the given Size, | |||
1263 | /// either because it was freshly alloca'd or started its lifetime. | |||
1264 | static bool hasUndefContents(Instruction *I, Value *Size) { | |||
1265 | if (isa<AllocaInst>(I)) | |||
1266 | return true; | |||
1267 | ||||
1268 | if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) { | |||
1269 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) | |||
1270 | if (II->getIntrinsicID() == Intrinsic::lifetime_start) | |||
1271 | if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0))) | |||
1272 | if (LTSize->getZExtValue() >= CSize->getZExtValue()) | |||
1273 | return true; | |||
1274 | } | |||
1275 | ||||
1276 | return false; | |||
1277 | } | |||
1278 | ||||
1279 | static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, | |||
1280 | MemoryDef *Def, Value *Size) { | |||
1281 | if (MSSA->isLiveOnEntryDef(Def)) | |||
1282 | return isa<AllocaInst>(getUnderlyingObject(V)); | |||
1283 | ||||
1284 | if (IntrinsicInst *II = | |||
1285 | dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) { | |||
1286 | if (II->getIntrinsicID() == Intrinsic::lifetime_start) { | |||
1287 | ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0)); | |||
1288 | ||||
1289 | if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) { | |||
1290 | if (AA->isMustAlias(V, II->getArgOperand(1)) && | |||
1291 | LTSize->getZExtValue() >= CSize->getZExtValue()) | |||
1292 | return true; | |||
1293 | } | |||
1294 | ||||
1295 | // If the lifetime.start covers a whole alloca (as it almost always | |||
1296 | // does) and we're querying a pointer based on that alloca, then we know | |||
1297 | // the memory is definitely undef, regardless of how exactly we alias. | |||
1298 | // The size also doesn't matter, as an out-of-bounds access would be UB. | |||
1299 | AllocaInst *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V)); | |||
1300 | if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) { | |||
1301 | const DataLayout &DL = Alloca->getModule()->getDataLayout(); | |||
1302 | if (Optional<TypeSize> AllocaSize = Alloca->getAllocationSizeInBits(DL)) | |||
1303 | if (*AllocaSize == LTSize->getValue() * 8) | |||
1304 | return true; | |||
1305 | } | |||
1306 | } | |||
1307 | } | |||
1308 | ||||
1309 | return false; | |||
1310 | } | |||
1311 | ||||
1312 | /// Transform memcpy to memset when its source was just memset. | |||
1313 | /// In other words, turn: | |||
1314 | /// \code | |||
1315 | /// memset(dst1, c, dst1_size); | |||
1316 | /// memcpy(dst2, dst1, dst2_size); | |||
1317 | /// \endcode | |||
1318 | /// into: | |||
1319 | /// \code | |||
1320 | /// memset(dst1, c, dst1_size); | |||
1321 | /// memset(dst2, c, dst2_size); | |||
1322 | /// \endcode | |||
1323 | /// When dst2_size <= dst1_size. | |||
1324 | bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, | |||
1325 | MemSetInst *MemSet) { | |||
1326 | // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and | |||
1327 | // memcpying from the same address. Otherwise it is hard to reason about. | |||
1328 | if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource())) | |||
1329 | return false; | |||
1330 | ||||
1331 | Value *MemSetSize = MemSet->getLength(); | |||
1332 | Value *CopySize = MemCpy->getLength(); | |||
1333 | ||||
1334 | if (MemSetSize != CopySize) { | |||
1335 | // Make sure the memcpy doesn't read any more than what the memset wrote. | |||
1336 | // Don't worry about sizes larger than i64. | |||
1337 | ||||
1338 | // A known memset size is required. | |||
1339 | ConstantInt *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize); | |||
1340 | if (!CMemSetSize) | |||
1341 | return false; | |||
1342 | ||||
1343 | // A known memcpy size is also required. | |||
1344 | ConstantInt *CCopySize = dyn_cast<ConstantInt>(CopySize); | |||
1345 | if (!CCopySize) | |||
1346 | return false; | |||
1347 | if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) { | |||
1348 | // If the memcpy is larger than the memset, but the memory was undef prior | |||
1349 | // to the memset, we can just ignore the tail. Technically we're only | |||
1350 | // interested in the bytes from MemSetSize..CopySize here, but as we can't | |||
1351 | // easily represent this location, we use the full 0..CopySize range. | |||
1352 | MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy); | |||
1353 | bool CanReduceSize = false; | |||
1354 | if (EnableMemorySSA) { | |||
1355 | MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet); | |||
1356 | MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( | |||
1357 | MemSetAccess->getDefiningAccess(), MemCpyLoc); | |||
1358 | if (auto *MD = dyn_cast<MemoryDef>(Clobber)) | |||
1359 | if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize)) | |||
1360 | CanReduceSize = true; | |||
1361 | } else { | |||
1362 | MemDepResult DepInfo = MD->getPointerDependencyFrom( | |||
1363 | MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent()); | |||
1364 | if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize)) | |||
1365 | CanReduceSize = true; | |||
1366 | } | |||
1367 | ||||
1368 | if (!CanReduceSize) | |||
1369 | return false; | |||
1370 | CopySize = MemSetSize; | |||
1371 | } | |||
1372 | } | |||
1373 | ||||
1374 | IRBuilder<> Builder(MemCpy); | |||
1375 | Instruction *NewM = | |||
1376 | Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), | |||
1377 | CopySize, MaybeAlign(MemCpy->getDestAlignment())); | |||
1378 | if (MSSAU) { | |||
1379 | auto *LastDef = | |||
1380 | cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); | |||
1381 | auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); | |||
1382 | MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); | |||
1383 | } | |||
1384 | ||||
1385 | return true; | |||
1386 | } | |||
1387 | ||||
1388 | /// Perform simplification of memcpy's. If we have memcpy A | |||
1389 | /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite | |||
1390 | /// B to be a memcpy from X to Z (or potentially a memmove, depending on | |||
1391 | /// circumstances). This allows later passes to remove the first memcpy | |||
1392 | /// altogether. | |||
1393 | bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { | |||
1394 | // We can only optimize non-volatile memcpy's. | |||
1395 | if (M->isVolatile()) return false; | |||
1396 | ||||
1397 | // If the source and destination of the memcpy are the same, then zap it. | |||
1398 | if (M->getSource() == M->getDest()) { | |||
1399 | ++BBI; | |||
1400 | eraseInstruction(M); | |||
1401 | return true; | |||
1402 | } | |||
1403 | ||||
1404 | // If copying from a constant, try to turn the memcpy into a memset. | |||
1405 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource())) | |||
1406 | if (GV->isConstant() && GV->hasDefinitiveInitializer()) | |||
1407 | if (Value *ByteVal = isBytewiseValue(GV->getInitializer(), | |||
1408 | M->getModule()->getDataLayout())) { | |||
1409 | IRBuilder<> Builder(M); | |||
1410 | Instruction *NewM = | |||
1411 | Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), | |||
1412 | MaybeAlign(M->getDestAlignment()), false); | |||
1413 | if (MSSAU) { | |||
1414 | auto *LastDef = | |||
1415 | cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)); | |||
1416 | auto *NewAccess = | |||
1417 | MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); | |||
1418 | MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); | |||
1419 | } | |||
1420 | ||||
1421 | eraseInstruction(M); | |||
1422 | ++NumCpyToSet; | |||
1423 | return true; | |||
1424 | } | |||
1425 | ||||
1426 | if (EnableMemorySSA) { | |||
1427 | MemoryUseOrDef *MA = MSSA->getMemoryAccess(M); | |||
1428 | MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA); | |||
1429 | MemoryLocation DestLoc = MemoryLocation::getForDest(M); | |||
1430 | const MemoryAccess *DestClobber = | |||
1431 | MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc); | |||
1432 | ||||
1433 | // Try to turn a partially redundant memset + memcpy into | |||
1434 | // memcpy + smaller memset. We don't need the memcpy size for this. | |||
1435 | // The memcpy most post-dom the memset, so limit this to the same basic | |||
1436 | // block. A non-local generalization is likely not worthwhile. | |||
1437 | if (auto *MD = dyn_cast<MemoryDef>(DestClobber)) | |||
1438 | if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst())) | |||
1439 | if (DestClobber->getBlock() == M->getParent()) | |||
1440 | if (processMemSetMemCpyDependence(M, MDep)) | |||
1441 | return true; | |||
1442 | ||||
1443 | MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( | |||
1444 | AnyClobber, MemoryLocation::getForSource(M)); | |||
1445 | ||||
1446 | // There are four possible optimizations we can do for memcpy: | |||
1447 | // a) memcpy-memcpy xform which exposes redundance for DSE. | |||
1448 | // b) call-memcpy xform for return slot optimization. | |||
1449 | // c) memcpy from freshly alloca'd space or space that has just started | |||
1450 | // its lifetime copies undefined data, and we can therefore eliminate | |||
1451 | // the memcpy in favor of the data that was already at the destination. | |||
1452 | // d) memcpy from a just-memset'd source can be turned into memset. | |||
1453 | if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) { | |||
1454 | if (Instruction *MI = MD->getMemoryInst()) { | |||
1455 | if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) { | |||
1456 | if (auto *C = dyn_cast<CallInst>(MI)) { | |||
1457 | // The memcpy must post-dom the call. Limit to the same block for | |||
1458 | // now. Additionally, we need to ensure that there are no accesses | |||
1459 | // to dest between the call and the memcpy. Accesses to src will be | |||
1460 | // checked by performCallSlotOptzn(). | |||
1461 | // TODO: Support non-local call-slot optimization? | |||
1462 | if (C->getParent() == M->getParent() && | |||
1463 | !accessedBetween(*AA, DestLoc, MD, MA)) { | |||
1464 | // FIXME: Can we pass in either of dest/src alignment here instead | |||
1465 | // of conservatively taking the minimum? | |||
1466 | Align Alignment = std::min(M->getDestAlign().valueOrOne(), | |||
1467 | M->getSourceAlign().valueOrOne()); | |||
1468 | if (performCallSlotOptzn( | |||
1469 | M, M, M->getDest(), M->getSource(), | |||
1470 | TypeSize::getFixed(CopySize->getZExtValue()), Alignment, | |||
1471 | C)) { | |||
1472 | LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"do { } while (false) | |||
1473 | << " call: " << *C << "\n"do { } while (false) | |||
1474 | << " memcpy: " << *M << "\n")do { } while (false); | |||
1475 | eraseInstruction(M); | |||
1476 | ++NumMemCpyInstr; | |||
1477 | return true; | |||
1478 | } | |||
1479 | } | |||
1480 | } | |||
1481 | } | |||
1482 | if (auto *MDep = dyn_cast<MemCpyInst>(MI)) | |||
1483 | return processMemCpyMemCpyDependence(M, MDep); | |||
1484 | if (auto *MDep = dyn_cast<MemSetInst>(MI)) { | |||
1485 | if (performMemCpyToMemSetOptzn(M, MDep)) { | |||
1486 | LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n")do { } while (false); | |||
1487 | eraseInstruction(M); | |||
1488 | ++NumCpyToSet; | |||
1489 | return true; | |||
1490 | } | |||
1491 | } | |||
1492 | } | |||
1493 | ||||
1494 | if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, M->getLength())) { | |||
1495 | LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n")do { } while (false); | |||
1496 | eraseInstruction(M); | |||
1497 | ++NumMemCpyInstr; | |||
1498 | return true; | |||
1499 | } | |||
1500 | } | |||
1501 | } else { | |||
1502 | MemDepResult DepInfo = MD->getDependency(M); | |||
1503 | ||||
1504 | // Try to turn a partially redundant memset + memcpy into | |||
1505 | // memcpy + smaller memset. We don't need the memcpy size for this. | |||
1506 | if (DepInfo.isClobber()) | |||
1507 | if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst())) | |||
1508 | if (processMemSetMemCpyDependence(M, MDep)) | |||
1509 | return true; | |||
1510 | ||||
1511 | // There are four possible optimizations we can do for memcpy: | |||
1512 | // a) memcpy-memcpy xform which exposes redundance for DSE. | |||
1513 | // b) call-memcpy xform for return slot optimization. | |||
1514 | // c) memcpy from freshly alloca'd space or space that has just started | |||
1515 | // its lifetime copies undefined data, and we can therefore eliminate | |||
1516 | // the memcpy in favor of the data that was already at the destination. | |||
1517 | // d) memcpy from a just-memset'd source can be turned into memset. | |||
1518 | if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) { | |||
1519 | if (DepInfo.isClobber()) { | |||
1520 | if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { | |||
1521 | // FIXME: Can we pass in either of dest/src alignment here instead | |||
1522 | // of conservatively taking the minimum? | |||
1523 | Align Alignment = std::min(M->getDestAlign().valueOrOne(), | |||
1524 | M->getSourceAlign().valueOrOne()); | |||
1525 | if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), | |||
1526 | TypeSize::getFixed(CopySize->getZExtValue()), | |||
1527 | Alignment, C)) { | |||
1528 | eraseInstruction(M); | |||
1529 | ++NumMemCpyInstr; | |||
1530 | return true; | |||
1531 | } | |||
1532 | } | |||
1533 | } | |||
1534 | } | |||
1535 | ||||
1536 | MemoryLocation SrcLoc = MemoryLocation::getForSource(M); | |||
1537 | MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( | |||
1538 | SrcLoc, true, M->getIterator(), M->getParent()); | |||
1539 | ||||
1540 | if (SrcDepInfo.isClobber()) { | |||
1541 | if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) | |||
1542 | return processMemCpyMemCpyDependence(M, MDep); | |||
1543 | } else if (SrcDepInfo.isDef()) { | |||
1544 | if (hasUndefContents(SrcDepInfo.getInst(), M->getLength())) { | |||
1545 | eraseInstruction(M); | |||
1546 | ++NumMemCpyInstr; | |||
1547 | return true; | |||
1548 | } | |||
1549 | } | |||
1550 | ||||
1551 | if (SrcDepInfo.isClobber()) | |||
1552 | if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst())) | |||
1553 | if (performMemCpyToMemSetOptzn(M, MDep)) { | |||
1554 | eraseInstruction(M); | |||
1555 | ++NumCpyToSet; | |||
1556 | return true; | |||
1557 | } | |||
1558 | } | |||
1559 | ||||
1560 | return false; | |||
1561 | } | |||
1562 | ||||
1563 | /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed | |||
1564 | /// not to alias. | |||
1565 | bool MemCpyOptPass::processMemMove(MemMoveInst *M) { | |||
1566 | if (!TLI->has(LibFunc_memmove)) | |||
1567 | return false; | |||
1568 | ||||
1569 | // See if the pointers alias. | |||
1570 | if (!AA->isNoAlias(MemoryLocation::getForDest(M), | |||
1571 | MemoryLocation::getForSource(M))) | |||
1572 | return false; | |||
1573 | ||||
1574 | LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *Mdo { } while (false) | |||
1575 | << "\n")do { } while (false); | |||
1576 | ||||
1577 | // If not, then we know we can transform this. | |||
1578 | Type *ArgTys[3] = { M->getRawDest()->getType(), | |||
1579 | M->getRawSource()->getType(), | |||
1580 | M->getLength()->getType() }; | |||
1581 | M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(), | |||
1582 | Intrinsic::memcpy, ArgTys)); | |||
1583 | ||||
1584 | // For MemorySSA nothing really changes (except that memcpy may imply stricter | |||
1585 | // aliasing guarantees). | |||
1586 | ||||
1587 | // MemDep may have over conservative information about this instruction, just | |||
1588 | // conservatively flush it from the cache. | |||
1589 | if (MD) | |||
1590 | MD->removeInstruction(M); | |||
1591 | ||||
1592 | ++NumMoveToCpy; | |||
1593 | return true; | |||
1594 | } | |||
1595 | ||||
1596 | /// This is called on every byval argument in call sites. | |||
1597 | bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { | |||
1598 | const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout(); | |||
1599 | // Find out what feeds this byval argument. | |||
1600 | Value *ByValArg = CB.getArgOperand(ArgNo); | |||
1601 | Type *ByValTy = CB.getParamByValType(ArgNo); | |||
1602 | TypeSize ByValSize = DL.getTypeAllocSize(ByValTy); | |||
1603 | MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize)); | |||
1604 | MemCpyInst *MDep = nullptr; | |||
1605 | if (EnableMemorySSA) { | |||
1606 | MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); | |||
| ||||
1607 | if (!CallAccess) | |||
1608 | return false; | |||
1609 | MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( | |||
1610 | CallAccess->getDefiningAccess(), Loc); | |||
1611 | if (auto *MD = dyn_cast<MemoryDef>(Clobber)) | |||
1612 | MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst()); | |||
1613 | } else { | |||
1614 | MemDepResult DepInfo = MD->getPointerDependencyFrom( | |||
1615 | Loc, true, CB.getIterator(), CB.getParent()); | |||
1616 | if (!DepInfo.isClobber()) | |||
1617 | return false; | |||
1618 | MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); | |||
1619 | } | |||
1620 | ||||
1621 | // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by | |||
1622 | // a memcpy, see if we can byval from the source of the memcpy instead of the | |||
1623 | // result. | |||
1624 | if (!MDep || MDep->isVolatile() || | |||
1625 | ByValArg->stripPointerCasts() != MDep->getDest()) | |||
1626 | return false; | |||
1627 | ||||
1628 | // The length of the memcpy must be larger or equal to the size of the byval. | |||
1629 | ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); | |||
1630 | if (!C1 || !TypeSize::isKnownGE( | |||
1631 | TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize)) | |||
1632 | return false; | |||
1633 | ||||
1634 | // Get the alignment of the byval. If the call doesn't specify the alignment, | |||
1635 | // then it is some target specific value that we can't know. | |||
1636 | MaybeAlign ByValAlign = CB.getParamAlign(ArgNo); | |||
1637 | if (!ByValAlign) return false; | |||
1638 | ||||
1639 | // If it is greater than the memcpy, then we check to see if we can force the | |||
1640 | // source of the memcpy to the alignment we need. If we fail, we bail out. | |||
1641 | MaybeAlign MemDepAlign = MDep->getSourceAlign(); | |||
1642 | if ((!MemDepAlign || *MemDepAlign < *ByValAlign) && | |||
1643 | getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC, | |||
1644 | DT) < *ByValAlign) | |||
1645 | return false; | |||
1646 | ||||
1647 | // The address space of the memcpy source must match the byval argument | |||
1648 | if (MDep->getSource()->getType()->getPointerAddressSpace() != | |||
1649 | ByValArg->getType()->getPointerAddressSpace()) | |||
1650 | return false; | |||
1651 | ||||
1652 | // Verify that the copied-from memory doesn't change in between the memcpy and | |||
1653 | // the byval call. | |||
1654 | // memcpy(a <- b) | |||
1655 | // *b = 42; | |||
1656 | // foo(*a) | |||
1657 | // It would be invalid to transform the second memcpy into foo(*b). | |||
1658 | if (EnableMemorySSA) { | |||
1659 | if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), | |||
1660 | MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB))) | |||
1661 | return false; | |||
1662 | } else { | |||
1663 | // NOTE: This is conservative, it will stop on any read from the source loc, | |||
1664 | // not just the defining memcpy. | |||
1665 | MemDepResult SourceDep = MD->getPointerDependencyFrom( | |||
1666 | MemoryLocation::getForSource(MDep), false, | |||
1667 | CB.getIterator(), MDep->getParent()); | |||
1668 | if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) | |||
1669 | return false; | |||
1670 | } | |||
1671 | ||||
1672 | Value *TmpCast = MDep->getSource(); | |||
1673 | if (MDep->getSource()->getType() != ByValArg->getType()) { | |||
1674 | BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(), | |||
1675 | "tmpcast", &CB); | |||
1676 | // Set the tmpcast's DebugLoc to MDep's | |||
1677 | TmpBitCast->setDebugLoc(MDep->getDebugLoc()); | |||
1678 | TmpCast = TmpBitCast; | |||
1679 | } | |||
1680 | ||||
1681 | LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"do { } while (false) | |||
1682 | << " " << *MDep << "\n"do { } while (false) | |||
1683 | << " " << CB << "\n")do { } while (false); | |||
1684 | ||||
1685 | // Otherwise we're good! Update the byval argument. | |||
1686 | CB.setArgOperand(ArgNo, TmpCast); | |||
1687 | ++NumMemCpyInstr; | |||
1688 | return true; | |||
1689 | } | |||
1690 | ||||
1691 | /// Executes one iteration of MemCpyOptPass. | |||
1692 | bool MemCpyOptPass::iterateOnFunction(Function &F) { | |||
1693 | bool MadeChange = false; | |||
1694 | ||||
1695 | // Walk all instruction in the function. | |||
1696 | for (BasicBlock &BB : F) { | |||
1697 | // Skip unreachable blocks. For example processStore assumes that an | |||
1698 | // instruction in a BB can't be dominated by a later instruction in the | |||
1699 | // same BB (which is a scenario that can happen for an unreachable BB that | |||
1700 | // has itself as a predecessor). | |||
1701 | if (!DT->isReachableFromEntry(&BB)) | |||
1702 | continue; | |||
1703 | ||||
1704 | for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { | |||
1705 | // Avoid invalidating the iterator. | |||
1706 | Instruction *I = &*BI++; | |||
1707 | ||||
1708 | bool RepeatInstruction = false; | |||
1709 | ||||
1710 | if (StoreInst *SI
| |||
1711 | MadeChange |= processStore(SI, BI); | |||
1712 | else if (MemSetInst *M
| |||
1713 | RepeatInstruction = processMemSet(M, BI); | |||
1714 | else if (MemCpyInst *M
| |||
1715 | RepeatInstruction = processMemCpy(M, BI); | |||
1716 | else if (MemMoveInst *M
| |||
1717 | RepeatInstruction = processMemMove(M); | |||
1718 | else if (auto *CB
| |||
1719 | for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) | |||
1720 | if (CB->isByValArgument(i)) | |||
1721 | MadeChange |= processByValArgument(*CB, i); | |||
1722 | } | |||
1723 | ||||
1724 | // Reprocess the instruction if desired. | |||
1725 | if (RepeatInstruction) { | |||
1726 | if (BI != BB.begin()) | |||
1727 | --BI; | |||
1728 | MadeChange = true; | |||
1729 | } | |||
1730 | } | |||
1731 | } | |||
1732 | ||||
1733 | return MadeChange; | |||
1734 | } | |||
1735 | ||||
1736 | PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { | |||
1737 | auto *MD = !EnableMemorySSA ? &AM.getResult<MemoryDependenceAnalysis>(F) | |||
1738 | : AM.getCachedResult<MemoryDependenceAnalysis>(F); | |||
1739 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); | |||
1740 | auto *AA = &AM.getResult<AAManager>(F); | |||
1741 | auto *AC = &AM.getResult<AssumptionAnalysis>(F); | |||
1742 | auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); | |||
1743 | auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F) | |||
1744 | : AM.getCachedResult<MemorySSAAnalysis>(F); | |||
1745 | ||||
1746 | bool MadeChange = | |||
1747 | runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr); | |||
1748 | if (!MadeChange) | |||
1749 | return PreservedAnalyses::all(); | |||
1750 | ||||
1751 | PreservedAnalyses PA; | |||
1752 | PA.preserveSet<CFGAnalyses>(); | |||
1753 | if (MD) | |||
1754 | PA.preserve<MemoryDependenceAnalysis>(); | |||
1755 | if (MSSA) | |||
1756 | PA.preserve<MemorySSAAnalysis>(); | |||
1757 | return PA; | |||
1758 | } | |||
1759 | ||||
1760 | bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_, | |||
1761 | TargetLibraryInfo *TLI_, AliasAnalysis *AA_, | |||
1762 | AssumptionCache *AC_, DominatorTree *DT_, | |||
1763 | MemorySSA *MSSA_) { | |||
1764 | bool MadeChange = false; | |||
1765 | MD = MD_; | |||
1766 | TLI = TLI_; | |||
1767 | AA = AA_; | |||
1768 | AC = AC_; | |||
1769 | DT = DT_; | |||
1770 | MSSA = MSSA_; | |||
1771 | MemorySSAUpdater MSSAU_(MSSA_); | |||
1772 | MSSAU = MSSA_
| |||
1773 | // If we don't have at least memset and memcpy, there is little point of doing | |||
1774 | // anything here. These are required by a freestanding implementation, so if | |||
1775 | // even they are disabled, there is no point in trying hard. | |||
1776 | if (!TLI->has(LibFunc_memset) || !TLI->has(LibFunc_memcpy)) | |||
1777 | return false; | |||
1778 | ||||
1779 | while (true) { | |||
1780 | if (!iterateOnFunction(F)) | |||
1781 | break; | |||
1782 | MadeChange = true; | |||
1783 | } | |||
1784 | ||||
1785 | if (MSSA_ && VerifyMemorySSA) | |||
1786 | MSSA_->verifyMemorySSA(); | |||
1787 | ||||
1788 | MD = nullptr; | |||
1789 | return MadeChange; | |||
1790 | } | |||
1791 | ||||
1792 | /// This is the main transformation entry point for a function. | |||
1793 | bool MemCpyOptLegacyPass::runOnFunction(Function &F) { | |||
1794 | if (skipFunction(F)) | |||
| ||||
1795 | return false; | |||
1796 | ||||
1797 | auto *MDWP = !EnableMemorySSA | |||
1798 | ? &getAnalysis<MemoryDependenceWrapperPass>() | |||
1799 | : getAnalysisIfAvailable<MemoryDependenceWrapperPass>(); | |||
1800 | auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); | |||
1801 | auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); | |||
1802 | auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); | |||
1803 | auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | |||
1804 | auto *MSSAWP = EnableMemorySSA | |||
1805 | ? &getAnalysis<MemorySSAWrapperPass>() | |||
1806 | : getAnalysisIfAvailable<MemorySSAWrapperPass>(); | |||
1807 | ||||
1808 | return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT, | |||
1809 | MSSAWP
| |||
1810 | } |