File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Analysis/VectorUtils.cpp |
Warning: | line 1180, column 11 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===// | ||||||||||
2 | // | ||||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||||
6 | // | ||||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||||
8 | // | ||||||||||
9 | // This file defines vectorizer utilities. | ||||||||||
10 | // | ||||||||||
11 | //===----------------------------------------------------------------------===// | ||||||||||
12 | |||||||||||
13 | #include "llvm/Analysis/VectorUtils.h" | ||||||||||
14 | #include "llvm/ADT/EquivalenceClasses.h" | ||||||||||
15 | #include "llvm/Analysis/DemandedBits.h" | ||||||||||
16 | #include "llvm/Analysis/LoopInfo.h" | ||||||||||
17 | #include "llvm/Analysis/LoopIterator.h" | ||||||||||
18 | #include "llvm/Analysis/ScalarEvolution.h" | ||||||||||
19 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | ||||||||||
20 | #include "llvm/Analysis/TargetTransformInfo.h" | ||||||||||
21 | #include "llvm/Analysis/ValueTracking.h" | ||||||||||
22 | #include "llvm/IR/Constants.h" | ||||||||||
23 | #include "llvm/IR/GetElementPtrTypeIterator.h" | ||||||||||
24 | #include "llvm/IR/IRBuilder.h" | ||||||||||
25 | #include "llvm/IR/PatternMatch.h" | ||||||||||
26 | #include "llvm/IR/Value.h" | ||||||||||
27 | #include "llvm/Support/CommandLine.h" | ||||||||||
28 | |||||||||||
29 | #define DEBUG_TYPE"vectorutils" "vectorutils" | ||||||||||
30 | |||||||||||
31 | using namespace llvm; | ||||||||||
32 | using namespace llvm::PatternMatch; | ||||||||||
33 | |||||||||||
34 | /// Maximum factor for an interleaved memory access. | ||||||||||
35 | static cl::opt<unsigned> MaxInterleaveGroupFactor( | ||||||||||
36 | "max-interleave-group-factor", cl::Hidden, | ||||||||||
37 | cl::desc("Maximum factor for an interleaved access group (default = 8)"), | ||||||||||
38 | cl::init(8)); | ||||||||||
39 | |||||||||||
40 | /// Return true if all of the intrinsic's arguments and return type are scalars | ||||||||||
41 | /// for the scalar form of the intrinsic, and vectors for the vector form of the | ||||||||||
42 | /// intrinsic (except operands that are marked as always being scalar by | ||||||||||
43 | /// hasVectorInstrinsicScalarOpd). | ||||||||||
44 | bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { | ||||||||||
45 | switch (ID) { | ||||||||||
46 | case Intrinsic::abs: // Begin integer bit-manipulation. | ||||||||||
47 | case Intrinsic::bswap: | ||||||||||
48 | case Intrinsic::bitreverse: | ||||||||||
49 | case Intrinsic::ctpop: | ||||||||||
50 | case Intrinsic::ctlz: | ||||||||||
51 | case Intrinsic::cttz: | ||||||||||
52 | case Intrinsic::fshl: | ||||||||||
53 | case Intrinsic::fshr: | ||||||||||
54 | case Intrinsic::smax: | ||||||||||
55 | case Intrinsic::smin: | ||||||||||
56 | case Intrinsic::umax: | ||||||||||
57 | case Intrinsic::umin: | ||||||||||
58 | case Intrinsic::sadd_sat: | ||||||||||
59 | case Intrinsic::ssub_sat: | ||||||||||
60 | case Intrinsic::uadd_sat: | ||||||||||
61 | case Intrinsic::usub_sat: | ||||||||||
62 | case Intrinsic::smul_fix: | ||||||||||
63 | case Intrinsic::smul_fix_sat: | ||||||||||
64 | case Intrinsic::umul_fix: | ||||||||||
65 | case Intrinsic::umul_fix_sat: | ||||||||||
66 | case Intrinsic::sqrt: // Begin floating-point. | ||||||||||
67 | case Intrinsic::sin: | ||||||||||
68 | case Intrinsic::cos: | ||||||||||
69 | case Intrinsic::exp: | ||||||||||
70 | case Intrinsic::exp2: | ||||||||||
71 | case Intrinsic::log: | ||||||||||
72 | case Intrinsic::log10: | ||||||||||
73 | case Intrinsic::log2: | ||||||||||
74 | case Intrinsic::fabs: | ||||||||||
75 | case Intrinsic::minnum: | ||||||||||
76 | case Intrinsic::maxnum: | ||||||||||
77 | case Intrinsic::minimum: | ||||||||||
78 | case Intrinsic::maximum: | ||||||||||
79 | case Intrinsic::copysign: | ||||||||||
80 | case Intrinsic::floor: | ||||||||||
81 | case Intrinsic::ceil: | ||||||||||
82 | case Intrinsic::trunc: | ||||||||||
83 | case Intrinsic::rint: | ||||||||||
84 | case Intrinsic::nearbyint: | ||||||||||
85 | case Intrinsic::round: | ||||||||||
86 | case Intrinsic::roundeven: | ||||||||||
87 | case Intrinsic::pow: | ||||||||||
88 | case Intrinsic::fma: | ||||||||||
89 | case Intrinsic::fmuladd: | ||||||||||
90 | case Intrinsic::powi: | ||||||||||
91 | case Intrinsic::canonicalize: | ||||||||||
92 | return true; | ||||||||||
93 | default: | ||||||||||
94 | return false; | ||||||||||
95 | } | ||||||||||
96 | } | ||||||||||
97 | |||||||||||
98 | /// Identifies if the vector form of the intrinsic has a scalar operand. | ||||||||||
99 | bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, | ||||||||||
100 | unsigned ScalarOpdIdx) { | ||||||||||
101 | switch (ID) { | ||||||||||
102 | case Intrinsic::abs: | ||||||||||
103 | case Intrinsic::ctlz: | ||||||||||
104 | case Intrinsic::cttz: | ||||||||||
105 | case Intrinsic::powi: | ||||||||||
106 | return (ScalarOpdIdx == 1); | ||||||||||
107 | case Intrinsic::smul_fix: | ||||||||||
108 | case Intrinsic::smul_fix_sat: | ||||||||||
109 | case Intrinsic::umul_fix: | ||||||||||
110 | case Intrinsic::umul_fix_sat: | ||||||||||
111 | return (ScalarOpdIdx == 2); | ||||||||||
112 | default: | ||||||||||
113 | return false; | ||||||||||
114 | } | ||||||||||
115 | } | ||||||||||
116 | |||||||||||
117 | bool llvm::hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID, | ||||||||||
118 | unsigned ScalarOpdIdx) { | ||||||||||
119 | switch (ID) { | ||||||||||
120 | case Intrinsic::powi: | ||||||||||
121 | return (ScalarOpdIdx == 1); | ||||||||||
122 | default: | ||||||||||
123 | return false; | ||||||||||
124 | } | ||||||||||
125 | } | ||||||||||
126 | |||||||||||
127 | /// Returns intrinsic ID for call. | ||||||||||
128 | /// For the input call instruction it finds mapping intrinsic and returns | ||||||||||
129 | /// its ID, in case it does not found it return not_intrinsic. | ||||||||||
130 | Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI, | ||||||||||
131 | const TargetLibraryInfo *TLI) { | ||||||||||
132 | Intrinsic::ID ID = getIntrinsicForCallSite(*CI, TLI); | ||||||||||
133 | if (ID == Intrinsic::not_intrinsic) | ||||||||||
134 | return Intrinsic::not_intrinsic; | ||||||||||
135 | |||||||||||
136 | if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || | ||||||||||
137 | ID == Intrinsic::lifetime_end || ID == Intrinsic::assume || | ||||||||||
138 | ID == Intrinsic::experimental_noalias_scope_decl || | ||||||||||
139 | ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe) | ||||||||||
140 | return ID; | ||||||||||
141 | return Intrinsic::not_intrinsic; | ||||||||||
142 | } | ||||||||||
143 | |||||||||||
144 | /// Find the operand of the GEP that should be checked for consecutive | ||||||||||
145 | /// stores. This ignores trailing indices that have no effect on the final | ||||||||||
146 | /// pointer. | ||||||||||
147 | unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) { | ||||||||||
148 | const DataLayout &DL = Gep->getModule()->getDataLayout(); | ||||||||||
149 | unsigned LastOperand = Gep->getNumOperands() - 1; | ||||||||||
150 | TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType()); | ||||||||||
151 | |||||||||||
152 | // Walk backwards and try to peel off zeros. | ||||||||||
153 | while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) { | ||||||||||
154 | // Find the type we're currently indexing into. | ||||||||||
155 | gep_type_iterator GEPTI = gep_type_begin(Gep); | ||||||||||
156 | std::advance(GEPTI, LastOperand - 2); | ||||||||||
157 | |||||||||||
158 | // If it's a type with the same allocation size as the result of the GEP we | ||||||||||
159 | // can peel off the zero index. | ||||||||||
160 | if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize) | ||||||||||
161 | break; | ||||||||||
162 | --LastOperand; | ||||||||||
163 | } | ||||||||||
164 | |||||||||||
165 | return LastOperand; | ||||||||||
166 | } | ||||||||||
167 | |||||||||||
168 | /// If the argument is a GEP, then returns the operand identified by | ||||||||||
169 | /// getGEPInductionOperand. However, if there is some other non-loop-invariant | ||||||||||
170 | /// operand, it returns that instead. | ||||||||||
171 | Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { | ||||||||||
172 | GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); | ||||||||||
173 | if (!GEP) | ||||||||||
174 | return Ptr; | ||||||||||
175 | |||||||||||
176 | unsigned InductionOperand = getGEPInductionOperand(GEP); | ||||||||||
177 | |||||||||||
178 | // Check that all of the gep indices are uniform except for our induction | ||||||||||
179 | // operand. | ||||||||||
180 | for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) | ||||||||||
181 | if (i != InductionOperand && | ||||||||||
182 | !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp)) | ||||||||||
183 | return Ptr; | ||||||||||
184 | return GEP->getOperand(InductionOperand); | ||||||||||
185 | } | ||||||||||
186 | |||||||||||
187 | /// If a value has only one user that is a CastInst, return it. | ||||||||||
188 | Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) { | ||||||||||
189 | Value *UniqueCast = nullptr; | ||||||||||
190 | for (User *U : Ptr->users()) { | ||||||||||
191 | CastInst *CI = dyn_cast<CastInst>(U); | ||||||||||
192 | if (CI && CI->getType() == Ty) { | ||||||||||
193 | if (!UniqueCast) | ||||||||||
194 | UniqueCast = CI; | ||||||||||
195 | else | ||||||||||
196 | return nullptr; | ||||||||||
197 | } | ||||||||||
198 | } | ||||||||||
199 | return UniqueCast; | ||||||||||
200 | } | ||||||||||
201 | |||||||||||
202 | /// Get the stride of a pointer access in a loop. Looks for symbolic | ||||||||||
203 | /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. | ||||||||||
204 | Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { | ||||||||||
205 | auto *PtrTy = dyn_cast<PointerType>(Ptr->getType()); | ||||||||||
206 | if (!PtrTy || PtrTy->isAggregateType()) | ||||||||||
207 | return nullptr; | ||||||||||
208 | |||||||||||
209 | // Try to remove a gep instruction to make the pointer (actually index at this | ||||||||||
210 | // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the | ||||||||||
211 | // pointer, otherwise, we are analyzing the index. | ||||||||||
212 | Value *OrigPtr = Ptr; | ||||||||||
213 | |||||||||||
214 | // The size of the pointer access. | ||||||||||
215 | int64_t PtrAccessSize = 1; | ||||||||||
216 | |||||||||||
217 | Ptr = stripGetElementPtr(Ptr, SE, Lp); | ||||||||||
218 | const SCEV *V = SE->getSCEV(Ptr); | ||||||||||
219 | |||||||||||
220 | if (Ptr != OrigPtr) | ||||||||||
221 | // Strip off casts. | ||||||||||
222 | while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) | ||||||||||
223 | V = C->getOperand(); | ||||||||||
224 | |||||||||||
225 | const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V); | ||||||||||
226 | if (!S) | ||||||||||
227 | return nullptr; | ||||||||||
228 | |||||||||||
229 | V = S->getStepRecurrence(*SE); | ||||||||||
230 | if (!V) | ||||||||||
231 | return nullptr; | ||||||||||
232 | |||||||||||
233 | // Strip off the size of access multiplication if we are still analyzing the | ||||||||||
234 | // pointer. | ||||||||||
235 | if (OrigPtr == Ptr) { | ||||||||||
236 | if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) { | ||||||||||
237 | if (M->getOperand(0)->getSCEVType() != scConstant) | ||||||||||
238 | return nullptr; | ||||||||||
239 | |||||||||||
240 | const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt(); | ||||||||||
241 | |||||||||||
242 | // Huge step value - give up. | ||||||||||
243 | if (APStepVal.getBitWidth() > 64) | ||||||||||
244 | return nullptr; | ||||||||||
245 | |||||||||||
246 | int64_t StepVal = APStepVal.getSExtValue(); | ||||||||||
247 | if (PtrAccessSize != StepVal) | ||||||||||
248 | return nullptr; | ||||||||||
249 | V = M->getOperand(1); | ||||||||||
250 | } | ||||||||||
251 | } | ||||||||||
252 | |||||||||||
253 | // Strip off casts. | ||||||||||
254 | Type *StripedOffRecurrenceCast = nullptr; | ||||||||||
255 | if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) { | ||||||||||
256 | StripedOffRecurrenceCast = C->getType(); | ||||||||||
257 | V = C->getOperand(); | ||||||||||
258 | } | ||||||||||
259 | |||||||||||
260 | // Look for the loop invariant symbolic value. | ||||||||||
261 | const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V); | ||||||||||
262 | if (!U) | ||||||||||
263 | return nullptr; | ||||||||||
264 | |||||||||||
265 | Value *Stride = U->getValue(); | ||||||||||
266 | if (!Lp->isLoopInvariant(Stride)) | ||||||||||
267 | return nullptr; | ||||||||||
268 | |||||||||||
269 | // If we have stripped off the recurrence cast we have to make sure that we | ||||||||||
270 | // return the value that is used in this loop so that we can replace it later. | ||||||||||
271 | if (StripedOffRecurrenceCast) | ||||||||||
272 | Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast); | ||||||||||
273 | |||||||||||
274 | return Stride; | ||||||||||
275 | } | ||||||||||
276 | |||||||||||
277 | /// Given a vector and an element number, see if the scalar value is | ||||||||||
278 | /// already around as a register, for example if it were inserted then extracted | ||||||||||
279 | /// from the vector. | ||||||||||
280 | Value *llvm::findScalarElement(Value *V, unsigned EltNo) { | ||||||||||
281 | assert(V->getType()->isVectorTy() && "Not looking at a vector?")((void)0); | ||||||||||
282 | VectorType *VTy = cast<VectorType>(V->getType()); | ||||||||||
283 | // For fixed-length vector, return undef for out of range access. | ||||||||||
284 | if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { | ||||||||||
285 | unsigned Width = FVTy->getNumElements(); | ||||||||||
286 | if (EltNo >= Width) | ||||||||||
287 | return UndefValue::get(FVTy->getElementType()); | ||||||||||
288 | } | ||||||||||
289 | |||||||||||
290 | if (Constant *C = dyn_cast<Constant>(V)) | ||||||||||
291 | return C->getAggregateElement(EltNo); | ||||||||||
292 | |||||||||||
293 | if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { | ||||||||||
294 | // If this is an insert to a variable element, we don't know what it is. | ||||||||||
295 | if (!isa<ConstantInt>(III->getOperand(2))) | ||||||||||
296 | return nullptr; | ||||||||||
297 | unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); | ||||||||||
298 | |||||||||||
299 | // If this is an insert to the element we are looking for, return the | ||||||||||
300 | // inserted value. | ||||||||||
301 | if (EltNo == IIElt) | ||||||||||
302 | return III->getOperand(1); | ||||||||||
303 | |||||||||||
304 | // Guard against infinite loop on malformed, unreachable IR. | ||||||||||
305 | if (III == III->getOperand(0)) | ||||||||||
306 | return nullptr; | ||||||||||
307 | |||||||||||
308 | // Otherwise, the insertelement doesn't modify the value, recurse on its | ||||||||||
309 | // vector input. | ||||||||||
310 | return findScalarElement(III->getOperand(0), EltNo); | ||||||||||
311 | } | ||||||||||
312 | |||||||||||
313 | ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V); | ||||||||||
314 | // Restrict the following transformation to fixed-length vector. | ||||||||||
315 | if (SVI && isa<FixedVectorType>(SVI->getType())) { | ||||||||||
316 | unsigned LHSWidth = | ||||||||||
317 | cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements(); | ||||||||||
318 | int InEl = SVI->getMaskValue(EltNo); | ||||||||||
319 | if (InEl < 0) | ||||||||||
320 | return UndefValue::get(VTy->getElementType()); | ||||||||||
321 | if (InEl < (int)LHSWidth) | ||||||||||
322 | return findScalarElement(SVI->getOperand(0), InEl); | ||||||||||
323 | return findScalarElement(SVI->getOperand(1), InEl - LHSWidth); | ||||||||||
324 | } | ||||||||||
325 | |||||||||||
326 | // Extract a value from a vector add operation with a constant zero. | ||||||||||
327 | // TODO: Use getBinOpIdentity() to generalize this. | ||||||||||
328 | Value *Val; Constant *C; | ||||||||||
329 | if (match(V, m_Add(m_Value(Val), m_Constant(C)))) | ||||||||||
330 | if (Constant *Elt = C->getAggregateElement(EltNo)) | ||||||||||
331 | if (Elt->isNullValue()) | ||||||||||
332 | return findScalarElement(Val, EltNo); | ||||||||||
333 | |||||||||||
334 | // Otherwise, we don't know. | ||||||||||
335 | return nullptr; | ||||||||||
336 | } | ||||||||||
337 | |||||||||||
338 | int llvm::getSplatIndex(ArrayRef<int> Mask) { | ||||||||||
339 | int SplatIndex = -1; | ||||||||||
340 | for (int M : Mask) { | ||||||||||
341 | // Ignore invalid (undefined) mask elements. | ||||||||||
342 | if (M < 0) | ||||||||||
343 | continue; | ||||||||||
344 | |||||||||||
345 | // There can be only 1 non-negative mask element value if this is a splat. | ||||||||||
346 | if (SplatIndex != -1 && SplatIndex != M) | ||||||||||
347 | return -1; | ||||||||||
348 | |||||||||||
349 | // Initialize the splat index to the 1st non-negative mask element. | ||||||||||
350 | SplatIndex = M; | ||||||||||
351 | } | ||||||||||
352 | assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?")((void)0); | ||||||||||
353 | return SplatIndex; | ||||||||||
354 | } | ||||||||||
355 | |||||||||||
356 | /// Get splat value if the input is a splat vector or return nullptr. | ||||||||||
357 | /// This function is not fully general. It checks only 2 cases: | ||||||||||
358 | /// the input value is (1) a splat constant vector or (2) a sequence | ||||||||||
359 | /// of instructions that broadcasts a scalar at element 0. | ||||||||||
360 | Value *llvm::getSplatValue(const Value *V) { | ||||||||||
361 | if (isa<VectorType>(V->getType())) | ||||||||||
362 | if (auto *C = dyn_cast<Constant>(V)) | ||||||||||
363 | return C->getSplatValue(); | ||||||||||
364 | |||||||||||
365 | // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...> | ||||||||||
366 | Value *Splat; | ||||||||||
367 | if (match(V, | ||||||||||
368 | m_Shuffle(m_InsertElt(m_Value(), m_Value(Splat), m_ZeroInt()), | ||||||||||
369 | m_Value(), m_ZeroMask()))) | ||||||||||
370 | return Splat; | ||||||||||
371 | |||||||||||
372 | return nullptr; | ||||||||||
373 | } | ||||||||||
374 | |||||||||||
375 | bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) { | ||||||||||
376 | assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth")((void)0); | ||||||||||
377 | |||||||||||
378 | if (isa<VectorType>(V->getType())) { | ||||||||||
379 | if (isa<UndefValue>(V)) | ||||||||||
380 | return true; | ||||||||||
381 | // FIXME: We can allow undefs, but if Index was specified, we may want to | ||||||||||
382 | // check that the constant is defined at that index. | ||||||||||
383 | if (auto *C = dyn_cast<Constant>(V)) | ||||||||||
384 | return C->getSplatValue() != nullptr; | ||||||||||
385 | } | ||||||||||
386 | |||||||||||
387 | if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) { | ||||||||||
388 | // FIXME: We can safely allow undefs here. If Index was specified, we will | ||||||||||
389 | // check that the mask elt is defined at the required index. | ||||||||||
390 | if (!is_splat(Shuf->getShuffleMask())) | ||||||||||
391 | return false; | ||||||||||
392 | |||||||||||
393 | // Match any index. | ||||||||||
394 | if (Index == -1) | ||||||||||
395 | return true; | ||||||||||
396 | |||||||||||
397 | // Match a specific element. The mask should be defined at and match the | ||||||||||
398 | // specified index. | ||||||||||
399 | return Shuf->getMaskValue(Index) == Index; | ||||||||||
400 | } | ||||||||||
401 | |||||||||||
402 | // The remaining tests are all recursive, so bail out if we hit the limit. | ||||||||||
403 | if (Depth++ == MaxAnalysisRecursionDepth) | ||||||||||
404 | return false; | ||||||||||
405 | |||||||||||
406 | // If both operands of a binop are splats, the result is a splat. | ||||||||||
407 | Value *X, *Y, *Z; | ||||||||||
408 | if (match(V, m_BinOp(m_Value(X), m_Value(Y)))) | ||||||||||
409 | return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth); | ||||||||||
410 | |||||||||||
411 | // If all operands of a select are splats, the result is a splat. | ||||||||||
412 | if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z)))) | ||||||||||
413 | return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) && | ||||||||||
414 | isSplatValue(Z, Index, Depth); | ||||||||||
415 | |||||||||||
416 | // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops). | ||||||||||
417 | |||||||||||
418 | return false; | ||||||||||
419 | } | ||||||||||
420 | |||||||||||
421 | void llvm::narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask, | ||||||||||
422 | SmallVectorImpl<int> &ScaledMask) { | ||||||||||
423 | assert(Scale > 0 && "Unexpected scaling factor")((void)0); | ||||||||||
424 | |||||||||||
425 | // Fast-path: if no scaling, then it is just a copy. | ||||||||||
426 | if (Scale == 1) { | ||||||||||
427 | ScaledMask.assign(Mask.begin(), Mask.end()); | ||||||||||
428 | return; | ||||||||||
429 | } | ||||||||||
430 | |||||||||||
431 | ScaledMask.clear(); | ||||||||||
432 | for (int MaskElt : Mask) { | ||||||||||
433 | if (MaskElt >= 0) { | ||||||||||
434 | assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&((void)0) | ||||||||||
435 | "Overflowed 32-bits")((void)0); | ||||||||||
436 | } | ||||||||||
437 | for (int SliceElt = 0; SliceElt != Scale; ++SliceElt) | ||||||||||
438 | ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt); | ||||||||||
439 | } | ||||||||||
440 | } | ||||||||||
441 | |||||||||||
442 | bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask, | ||||||||||
443 | SmallVectorImpl<int> &ScaledMask) { | ||||||||||
444 | assert(Scale > 0 && "Unexpected scaling factor")((void)0); | ||||||||||
445 | |||||||||||
446 | // Fast-path: if no scaling, then it is just a copy. | ||||||||||
447 | if (Scale == 1) { | ||||||||||
448 | ScaledMask.assign(Mask.begin(), Mask.end()); | ||||||||||
449 | return true; | ||||||||||
450 | } | ||||||||||
451 | |||||||||||
452 | // We must map the original elements down evenly to a type with less elements. | ||||||||||
453 | int NumElts = Mask.size(); | ||||||||||
454 | if (NumElts % Scale != 0) | ||||||||||
455 | return false; | ||||||||||
456 | |||||||||||
457 | ScaledMask.clear(); | ||||||||||
458 | ScaledMask.reserve(NumElts / Scale); | ||||||||||
459 | |||||||||||
460 | // Step through the input mask by splitting into Scale-sized slices. | ||||||||||
461 | do { | ||||||||||
462 | ArrayRef<int> MaskSlice = Mask.take_front(Scale); | ||||||||||
463 | assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.")((void)0); | ||||||||||
464 | |||||||||||
465 | // The first element of the slice determines how we evaluate this slice. | ||||||||||
466 | int SliceFront = MaskSlice.front(); | ||||||||||
467 | if (SliceFront < 0) { | ||||||||||
468 | // Negative values (undef or other "sentinel" values) must be equal across | ||||||||||
469 | // the entire slice. | ||||||||||
470 | if (!is_splat(MaskSlice)) | ||||||||||
471 | return false; | ||||||||||
472 | ScaledMask.push_back(SliceFront); | ||||||||||
473 | } else { | ||||||||||
474 | // A positive mask element must be cleanly divisible. | ||||||||||
475 | if (SliceFront % Scale != 0) | ||||||||||
476 | return false; | ||||||||||
477 | // Elements of the slice must be consecutive. | ||||||||||
478 | for (int i = 1; i < Scale; ++i) | ||||||||||
479 | if (MaskSlice[i] != SliceFront + i) | ||||||||||
480 | return false; | ||||||||||
481 | ScaledMask.push_back(SliceFront / Scale); | ||||||||||
482 | } | ||||||||||
483 | Mask = Mask.drop_front(Scale); | ||||||||||
484 | } while (!Mask.empty()); | ||||||||||
485 | |||||||||||
486 | assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask")((void)0); | ||||||||||
487 | |||||||||||
488 | // All elements of the original mask can be scaled down to map to the elements | ||||||||||
489 | // of a mask with wider elements. | ||||||||||
490 | return true; | ||||||||||
491 | } | ||||||||||
492 | |||||||||||
493 | MapVector<Instruction *, uint64_t> | ||||||||||
494 | llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB, | ||||||||||
495 | const TargetTransformInfo *TTI) { | ||||||||||
496 | |||||||||||
497 | // DemandedBits will give us every value's live-out bits. But we want | ||||||||||
498 | // to ensure no extra casts would need to be inserted, so every DAG | ||||||||||
499 | // of connected values must have the same minimum bitwidth. | ||||||||||
500 | EquivalenceClasses<Value *> ECs; | ||||||||||
501 | SmallVector<Value *, 16> Worklist; | ||||||||||
502 | SmallPtrSet<Value *, 4> Roots; | ||||||||||
503 | SmallPtrSet<Value *, 16> Visited; | ||||||||||
504 | DenseMap<Value *, uint64_t> DBits; | ||||||||||
505 | SmallPtrSet<Instruction *, 4> InstructionSet; | ||||||||||
506 | MapVector<Instruction *, uint64_t> MinBWs; | ||||||||||
507 | |||||||||||
508 | // Determine the roots. We work bottom-up, from truncs or icmps. | ||||||||||
509 | bool SeenExtFromIllegalType = false; | ||||||||||
510 | for (auto *BB : Blocks) | ||||||||||
511 | for (auto &I : *BB) { | ||||||||||
512 | InstructionSet.insert(&I); | ||||||||||
513 | |||||||||||
514 | if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) && | ||||||||||
515 | !TTI->isTypeLegal(I.getOperand(0)->getType())) | ||||||||||
516 | SeenExtFromIllegalType = true; | ||||||||||
517 | |||||||||||
518 | // Only deal with non-vector integers up to 64-bits wide. | ||||||||||
519 | if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) && | ||||||||||
520 | !I.getType()->isVectorTy() && | ||||||||||
521 | I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) { | ||||||||||
522 | // Don't make work for ourselves. If we know the loaded type is legal, | ||||||||||
523 | // don't add it to the worklist. | ||||||||||
524 | if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType())) | ||||||||||
525 | continue; | ||||||||||
526 | |||||||||||
527 | Worklist.push_back(&I); | ||||||||||
528 | Roots.insert(&I); | ||||||||||
529 | } | ||||||||||
530 | } | ||||||||||
531 | // Early exit. | ||||||||||
532 | if (Worklist.empty() || (TTI && !SeenExtFromIllegalType)) | ||||||||||
533 | return MinBWs; | ||||||||||
534 | |||||||||||
535 | // Now proceed breadth-first, unioning values together. | ||||||||||
536 | while (!Worklist.empty()) { | ||||||||||
537 | Value *Val = Worklist.pop_back_val(); | ||||||||||
538 | Value *Leader = ECs.getOrInsertLeaderValue(Val); | ||||||||||
539 | |||||||||||
540 | if (Visited.count(Val)) | ||||||||||
541 | continue; | ||||||||||
542 | Visited.insert(Val); | ||||||||||
543 | |||||||||||
544 | // Non-instructions terminate a chain successfully. | ||||||||||
545 | if (!isa<Instruction>(Val)) | ||||||||||
546 | continue; | ||||||||||
547 | Instruction *I = cast<Instruction>(Val); | ||||||||||
548 | |||||||||||
549 | // If we encounter a type that is larger than 64 bits, we can't represent | ||||||||||
550 | // it so bail out. | ||||||||||
551 | if (DB.getDemandedBits(I).getBitWidth() > 64) | ||||||||||
552 | return MapVector<Instruction *, uint64_t>(); | ||||||||||
553 | |||||||||||
554 | uint64_t V = DB.getDemandedBits(I).getZExtValue(); | ||||||||||
555 | DBits[Leader] |= V; | ||||||||||
556 | DBits[I] = V; | ||||||||||
557 | |||||||||||
558 | // Casts, loads and instructions outside of our range terminate a chain | ||||||||||
559 | // successfully. | ||||||||||
560 | if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) || | ||||||||||
561 | !InstructionSet.count(I)) | ||||||||||
562 | continue; | ||||||||||
563 | |||||||||||
564 | // Unsafe casts terminate a chain unsuccessfully. We can't do anything | ||||||||||
565 | // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to | ||||||||||
566 | // transform anything that relies on them. | ||||||||||
567 | if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) || | ||||||||||
568 | !I->getType()->isIntegerTy()) { | ||||||||||
569 | DBits[Leader] |= ~0ULL; | ||||||||||
570 | continue; | ||||||||||
571 | } | ||||||||||
572 | |||||||||||
573 | // We don't modify the types of PHIs. Reductions will already have been | ||||||||||
574 | // truncated if possible, and inductions' sizes will have been chosen by | ||||||||||
575 | // indvars. | ||||||||||
576 | if (isa<PHINode>(I)) | ||||||||||
577 | continue; | ||||||||||
578 | |||||||||||
579 | if (DBits[Leader] == ~0ULL) | ||||||||||
580 | // All bits demanded, no point continuing. | ||||||||||
581 | continue; | ||||||||||
582 | |||||||||||
583 | for (Value *O : cast<User>(I)->operands()) { | ||||||||||
584 | ECs.unionSets(Leader, O); | ||||||||||
585 | Worklist.push_back(O); | ||||||||||
586 | } | ||||||||||
587 | } | ||||||||||
588 | |||||||||||
589 | // Now we've discovered all values, walk them to see if there are | ||||||||||
590 | // any users we didn't see. If there are, we can't optimize that | ||||||||||
591 | // chain. | ||||||||||
592 | for (auto &I : DBits) | ||||||||||
593 | for (auto *U : I.first->users()) | ||||||||||
594 | if (U->getType()->isIntegerTy() && DBits.count(U) == 0) | ||||||||||
595 | DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL; | ||||||||||
596 | |||||||||||
597 | for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) { | ||||||||||
598 | uint64_t LeaderDemandedBits = 0; | ||||||||||
599 | for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) | ||||||||||
600 | LeaderDemandedBits |= DBits[M]; | ||||||||||
601 | |||||||||||
602 | uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) - | ||||||||||
603 | llvm::countLeadingZeros(LeaderDemandedBits); | ||||||||||
604 | // Round up to a power of 2 | ||||||||||
605 | if (!isPowerOf2_64((uint64_t)MinBW)) | ||||||||||
606 | MinBW = NextPowerOf2(MinBW); | ||||||||||
607 | |||||||||||
608 | // We don't modify the types of PHIs. Reductions will already have been | ||||||||||
609 | // truncated if possible, and inductions' sizes will have been chosen by | ||||||||||
610 | // indvars. | ||||||||||
611 | // If we are required to shrink a PHI, abandon this entire equivalence class. | ||||||||||
612 | bool Abort = false; | ||||||||||
613 | for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) | ||||||||||
614 | if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) { | ||||||||||
615 | Abort = true; | ||||||||||
616 | break; | ||||||||||
617 | } | ||||||||||
618 | if (Abort) | ||||||||||
619 | continue; | ||||||||||
620 | |||||||||||
621 | for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) { | ||||||||||
622 | if (!isa<Instruction>(M)) | ||||||||||
623 | continue; | ||||||||||
624 | Type *Ty = M->getType(); | ||||||||||
625 | if (Roots.count(M)) | ||||||||||
626 | Ty = cast<Instruction>(M)->getOperand(0)->getType(); | ||||||||||
627 | if (MinBW < Ty->getScalarSizeInBits()) | ||||||||||
628 | MinBWs[cast<Instruction>(M)] = MinBW; | ||||||||||
629 | } | ||||||||||
630 | } | ||||||||||
631 | |||||||||||
632 | return MinBWs; | ||||||||||
633 | } | ||||||||||
634 | |||||||||||
635 | /// Add all access groups in @p AccGroups to @p List. | ||||||||||
636 | template <typename ListT> | ||||||||||
637 | static void addToAccessGroupList(ListT &List, MDNode *AccGroups) { | ||||||||||
638 | // Interpret an access group as a list containing itself. | ||||||||||
639 | if (AccGroups->getNumOperands() == 0) { | ||||||||||
640 | assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group")((void)0); | ||||||||||
641 | List.insert(AccGroups); | ||||||||||
642 | return; | ||||||||||
643 | } | ||||||||||
644 | |||||||||||
645 | for (auto &AccGroupListOp : AccGroups->operands()) { | ||||||||||
646 | auto *Item = cast<MDNode>(AccGroupListOp.get()); | ||||||||||
647 | assert(isValidAsAccessGroup(Item) && "List item must be an access group")((void)0); | ||||||||||
648 | List.insert(Item); | ||||||||||
649 | } | ||||||||||
650 | } | ||||||||||
651 | |||||||||||
652 | MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) { | ||||||||||
653 | if (!AccGroups1) | ||||||||||
654 | return AccGroups2; | ||||||||||
655 | if (!AccGroups2) | ||||||||||
656 | return AccGroups1; | ||||||||||
657 | if (AccGroups1 == AccGroups2) | ||||||||||
658 | return AccGroups1; | ||||||||||
659 | |||||||||||
660 | SmallSetVector<Metadata *, 4> Union; | ||||||||||
661 | addToAccessGroupList(Union, AccGroups1); | ||||||||||
662 | addToAccessGroupList(Union, AccGroups2); | ||||||||||
663 | |||||||||||
664 | if (Union.size() == 0) | ||||||||||
665 | return nullptr; | ||||||||||
666 | if (Union.size() == 1) | ||||||||||
667 | return cast<MDNode>(Union.front()); | ||||||||||
668 | |||||||||||
669 | LLVMContext &Ctx = AccGroups1->getContext(); | ||||||||||
670 | return MDNode::get(Ctx, Union.getArrayRef()); | ||||||||||
671 | } | ||||||||||
672 | |||||||||||
673 | MDNode *llvm::intersectAccessGroups(const Instruction *Inst1, | ||||||||||
674 | const Instruction *Inst2) { | ||||||||||
675 | bool MayAccessMem1 = Inst1->mayReadOrWriteMemory(); | ||||||||||
676 | bool MayAccessMem2 = Inst2->mayReadOrWriteMemory(); | ||||||||||
677 | |||||||||||
678 | if (!MayAccessMem1 && !MayAccessMem2) | ||||||||||
679 | return nullptr; | ||||||||||
680 | if (!MayAccessMem1) | ||||||||||
681 | return Inst2->getMetadata(LLVMContext::MD_access_group); | ||||||||||
682 | if (!MayAccessMem2) | ||||||||||
683 | return Inst1->getMetadata(LLVMContext::MD_access_group); | ||||||||||
684 | |||||||||||
685 | MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group); | ||||||||||
686 | MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group); | ||||||||||
687 | if (!MD1 || !MD2) | ||||||||||
688 | return nullptr; | ||||||||||
689 | if (MD1 == MD2) | ||||||||||
690 | return MD1; | ||||||||||
691 | |||||||||||
692 | // Use set for scalable 'contains' check. | ||||||||||
693 | SmallPtrSet<Metadata *, 4> AccGroupSet2; | ||||||||||
694 | addToAccessGroupList(AccGroupSet2, MD2); | ||||||||||
695 | |||||||||||
696 | SmallVector<Metadata *, 4> Intersection; | ||||||||||
697 | if (MD1->getNumOperands() == 0) { | ||||||||||
698 | assert(isValidAsAccessGroup(MD1) && "Node must be an access group")((void)0); | ||||||||||
699 | if (AccGroupSet2.count(MD1)) | ||||||||||
700 | Intersection.push_back(MD1); | ||||||||||
701 | } else { | ||||||||||
702 | for (const MDOperand &Node : MD1->operands()) { | ||||||||||
703 | auto *Item = cast<MDNode>(Node.get()); | ||||||||||
704 | assert(isValidAsAccessGroup(Item) && "List item must be an access group")((void)0); | ||||||||||
705 | if (AccGroupSet2.count(Item)) | ||||||||||
706 | Intersection.push_back(Item); | ||||||||||
707 | } | ||||||||||
708 | } | ||||||||||
709 | |||||||||||
710 | if (Intersection.size() == 0) | ||||||||||
711 | return nullptr; | ||||||||||
712 | if (Intersection.size() == 1) | ||||||||||
713 | return cast<MDNode>(Intersection.front()); | ||||||||||
714 | |||||||||||
715 | LLVMContext &Ctx = Inst1->getContext(); | ||||||||||
716 | return MDNode::get(Ctx, Intersection); | ||||||||||
717 | } | ||||||||||
718 | |||||||||||
719 | /// \returns \p I after propagating metadata from \p VL. | ||||||||||
720 | Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) { | ||||||||||
721 | if (VL.empty()) | ||||||||||
722 | return Inst; | ||||||||||
723 | Instruction *I0 = cast<Instruction>(VL[0]); | ||||||||||
724 | SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata; | ||||||||||
725 | I0->getAllMetadataOtherThanDebugLoc(Metadata); | ||||||||||
726 | |||||||||||
727 | for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, | ||||||||||
728 | LLVMContext::MD_noalias, LLVMContext::MD_fpmath, | ||||||||||
729 | LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load, | ||||||||||
730 | LLVMContext::MD_access_group}) { | ||||||||||
731 | MDNode *MD = I0->getMetadata(Kind); | ||||||||||
732 | |||||||||||
733 | for (int J = 1, E = VL.size(); MD && J != E; ++J) { | ||||||||||
734 | const Instruction *IJ = cast<Instruction>(VL[J]); | ||||||||||
735 | MDNode *IMD = IJ->getMetadata(Kind); | ||||||||||
736 | switch (Kind) { | ||||||||||
737 | case LLVMContext::MD_tbaa: | ||||||||||
738 | MD = MDNode::getMostGenericTBAA(MD, IMD); | ||||||||||
739 | break; | ||||||||||
740 | case LLVMContext::MD_alias_scope: | ||||||||||
741 | MD = MDNode::getMostGenericAliasScope(MD, IMD); | ||||||||||
742 | break; | ||||||||||
743 | case LLVMContext::MD_fpmath: | ||||||||||
744 | MD = MDNode::getMostGenericFPMath(MD, IMD); | ||||||||||
745 | break; | ||||||||||
746 | case LLVMContext::MD_noalias: | ||||||||||
747 | case LLVMContext::MD_nontemporal: | ||||||||||
748 | case LLVMContext::MD_invariant_load: | ||||||||||
749 | MD = MDNode::intersect(MD, IMD); | ||||||||||
750 | break; | ||||||||||
751 | case LLVMContext::MD_access_group: | ||||||||||
752 | MD = intersectAccessGroups(Inst, IJ); | ||||||||||
753 | break; | ||||||||||
754 | default: | ||||||||||
755 | llvm_unreachable("unhandled metadata")__builtin_unreachable(); | ||||||||||
756 | } | ||||||||||
757 | } | ||||||||||
758 | |||||||||||
759 | Inst->setMetadata(Kind, MD); | ||||||||||
760 | } | ||||||||||
761 | |||||||||||
762 | return Inst; | ||||||||||
763 | } | ||||||||||
764 | |||||||||||
765 | Constant * | ||||||||||
766 | llvm::createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, | ||||||||||
767 | const InterleaveGroup<Instruction> &Group) { | ||||||||||
768 | // All 1's means mask is not needed. | ||||||||||
769 | if (Group.getNumMembers() == Group.getFactor()) | ||||||||||
770 | return nullptr; | ||||||||||
771 | |||||||||||
772 | // TODO: support reversed access. | ||||||||||
773 | assert(!Group.isReverse() && "Reversed group not supported.")((void)0); | ||||||||||
774 | |||||||||||
775 | SmallVector<Constant *, 16> Mask; | ||||||||||
776 | for (unsigned i = 0; i < VF; i++) | ||||||||||
777 | for (unsigned j = 0; j < Group.getFactor(); ++j) { | ||||||||||
778 | unsigned HasMember = Group.getMember(j) ? 1 : 0; | ||||||||||
779 | Mask.push_back(Builder.getInt1(HasMember)); | ||||||||||
780 | } | ||||||||||
781 | |||||||||||
782 | return ConstantVector::get(Mask); | ||||||||||
783 | } | ||||||||||
784 | |||||||||||
785 | llvm::SmallVector<int, 16> | ||||||||||
786 | llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) { | ||||||||||
787 | SmallVector<int, 16> MaskVec; | ||||||||||
788 | for (unsigned i = 0; i < VF; i++) | ||||||||||
789 | for (unsigned j = 0; j < ReplicationFactor; j++) | ||||||||||
790 | MaskVec.push_back(i); | ||||||||||
791 | |||||||||||
792 | return MaskVec; | ||||||||||
793 | } | ||||||||||
794 | |||||||||||
795 | llvm::SmallVector<int, 16> llvm::createInterleaveMask(unsigned VF, | ||||||||||
796 | unsigned NumVecs) { | ||||||||||
797 | SmallVector<int, 16> Mask; | ||||||||||
798 | for (unsigned i = 0; i < VF; i++) | ||||||||||
799 | for (unsigned j = 0; j < NumVecs; j++) | ||||||||||
800 | Mask.push_back(j * VF + i); | ||||||||||
801 | |||||||||||
802 | return Mask; | ||||||||||
803 | } | ||||||||||
804 | |||||||||||
805 | llvm::SmallVector<int, 16> | ||||||||||
806 | llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) { | ||||||||||
807 | SmallVector<int, 16> Mask; | ||||||||||
808 | for (unsigned i = 0; i < VF; i++) | ||||||||||
809 | Mask.push_back(Start + i * Stride); | ||||||||||
810 | |||||||||||
811 | return Mask; | ||||||||||
812 | } | ||||||||||
813 | |||||||||||
814 | llvm::SmallVector<int, 16> llvm::createSequentialMask(unsigned Start, | ||||||||||
815 | unsigned NumInts, | ||||||||||
816 | unsigned NumUndefs) { | ||||||||||
817 | SmallVector<int, 16> Mask; | ||||||||||
818 | for (unsigned i = 0; i < NumInts; i++) | ||||||||||
819 | Mask.push_back(Start + i); | ||||||||||
820 | |||||||||||
821 | for (unsigned i = 0; i < NumUndefs; i++) | ||||||||||
822 | Mask.push_back(-1); | ||||||||||
823 | |||||||||||
824 | return Mask; | ||||||||||
825 | } | ||||||||||
826 | |||||||||||
827 | /// A helper function for concatenating vectors. This function concatenates two | ||||||||||
828 | /// vectors having the same element type. If the second vector has fewer | ||||||||||
829 | /// elements than the first, it is padded with undefs. | ||||||||||
830 | static Value *concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, | ||||||||||
831 | Value *V2) { | ||||||||||
832 | VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType()); | ||||||||||
833 | VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType()); | ||||||||||
834 | assert(VecTy1 && VecTy2 &&((void)0) | ||||||||||
835 | VecTy1->getScalarType() == VecTy2->getScalarType() &&((void)0) | ||||||||||
836 | "Expect two vectors with the same element type")((void)0); | ||||||||||
837 | |||||||||||
838 | unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements(); | ||||||||||
839 | unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements(); | ||||||||||
840 | assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements")((void)0); | ||||||||||
841 | |||||||||||
842 | if (NumElts1 > NumElts2) { | ||||||||||
843 | // Extend with UNDEFs. | ||||||||||
844 | V2 = Builder.CreateShuffleVector( | ||||||||||
845 | V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2)); | ||||||||||
846 | } | ||||||||||
847 | |||||||||||
848 | return Builder.CreateShuffleVector( | ||||||||||
849 | V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0)); | ||||||||||
850 | } | ||||||||||
851 | |||||||||||
852 | Value *llvm::concatenateVectors(IRBuilderBase &Builder, | ||||||||||
853 | ArrayRef<Value *> Vecs) { | ||||||||||
854 | unsigned NumVecs = Vecs.size(); | ||||||||||
855 | assert(NumVecs > 1 && "Should be at least two vectors")((void)0); | ||||||||||
856 | |||||||||||
857 | SmallVector<Value *, 8> ResList; | ||||||||||
858 | ResList.append(Vecs.begin(), Vecs.end()); | ||||||||||
859 | do { | ||||||||||
860 | SmallVector<Value *, 8> TmpList; | ||||||||||
861 | for (unsigned i = 0; i < NumVecs - 1; i += 2) { | ||||||||||
862 | Value *V0 = ResList[i], *V1 = ResList[i + 1]; | ||||||||||
863 | assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&((void)0) | ||||||||||
864 | "Only the last vector may have a different type")((void)0); | ||||||||||
865 | |||||||||||
866 | TmpList.push_back(concatenateTwoVectors(Builder, V0, V1)); | ||||||||||
867 | } | ||||||||||
868 | |||||||||||
869 | // Push the last vector if the total number of vectors is odd. | ||||||||||
870 | if (NumVecs % 2 != 0) | ||||||||||
871 | TmpList.push_back(ResList[NumVecs - 1]); | ||||||||||
872 | |||||||||||
873 | ResList = TmpList; | ||||||||||
874 | NumVecs = ResList.size(); | ||||||||||
875 | } while (NumVecs > 1); | ||||||||||
876 | |||||||||||
877 | return ResList[0]; | ||||||||||
878 | } | ||||||||||
879 | |||||||||||
880 | bool llvm::maskIsAllZeroOrUndef(Value *Mask) { | ||||||||||
881 | assert(isa<VectorType>(Mask->getType()) &&((void)0) | ||||||||||
882 | isa<IntegerType>(Mask->getType()->getScalarType()) &&((void)0) | ||||||||||
883 | cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==((void)0) | ||||||||||
884 | 1 &&((void)0) | ||||||||||
885 | "Mask must be a vector of i1")((void)0); | ||||||||||
886 | |||||||||||
887 | auto *ConstMask = dyn_cast<Constant>(Mask); | ||||||||||
888 | if (!ConstMask) | ||||||||||
889 | return false; | ||||||||||
890 | if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask)) | ||||||||||
891 | return true; | ||||||||||
892 | if (isa<ScalableVectorType>(ConstMask->getType())) | ||||||||||
893 | return false; | ||||||||||
894 | for (unsigned | ||||||||||
895 | I = 0, | ||||||||||
896 | E = cast<FixedVectorType>(ConstMask->getType())->getNumElements(); | ||||||||||
897 | I != E; ++I) { | ||||||||||
898 | if (auto *MaskElt = ConstMask->getAggregateElement(I)) | ||||||||||
899 | if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt)) | ||||||||||
900 | continue; | ||||||||||
901 | return false; | ||||||||||
902 | } | ||||||||||
903 | return true; | ||||||||||
904 | } | ||||||||||
905 | |||||||||||
906 | bool llvm::maskIsAllOneOrUndef(Value *Mask) { | ||||||||||
907 | assert(isa<VectorType>(Mask->getType()) &&((void)0) | ||||||||||
908 | isa<IntegerType>(Mask->getType()->getScalarType()) &&((void)0) | ||||||||||
909 | cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==((void)0) | ||||||||||
910 | 1 &&((void)0) | ||||||||||
911 | "Mask must be a vector of i1")((void)0); | ||||||||||
912 | |||||||||||
913 | auto *ConstMask = dyn_cast<Constant>(Mask); | ||||||||||
914 | if (!ConstMask) | ||||||||||
915 | return false; | ||||||||||
916 | if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask)) | ||||||||||
917 | return true; | ||||||||||
918 | if (isa<ScalableVectorType>(ConstMask->getType())) | ||||||||||
919 | return false; | ||||||||||
920 | for (unsigned | ||||||||||
921 | I = 0, | ||||||||||
922 | E = cast<FixedVectorType>(ConstMask->getType())->getNumElements(); | ||||||||||
923 | I != E; ++I) { | ||||||||||
924 | if (auto *MaskElt = ConstMask->getAggregateElement(I)) | ||||||||||
925 | if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt)) | ||||||||||
926 | continue; | ||||||||||
927 | return false; | ||||||||||
928 | } | ||||||||||
929 | return true; | ||||||||||
930 | } | ||||||||||
931 | |||||||||||
932 | /// TODO: This is a lot like known bits, but for | ||||||||||
933 | /// vectors. Is there something we can common this with? | ||||||||||
934 | APInt llvm::possiblyDemandedEltsInMask(Value *Mask) { | ||||||||||
935 | assert(isa<FixedVectorType>(Mask->getType()) &&((void)0) | ||||||||||
936 | isa<IntegerType>(Mask->getType()->getScalarType()) &&((void)0) | ||||||||||
937 | cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==((void)0) | ||||||||||
938 | 1 &&((void)0) | ||||||||||
939 | "Mask must be a fixed width vector of i1")((void)0); | ||||||||||
940 | |||||||||||
941 | const unsigned VWidth = | ||||||||||
942 | cast<FixedVectorType>(Mask->getType())->getNumElements(); | ||||||||||
943 | APInt DemandedElts = APInt::getAllOnesValue(VWidth); | ||||||||||
944 | if (auto *CV = dyn_cast<ConstantVector>(Mask)) | ||||||||||
945 | for (unsigned i = 0; i < VWidth; i++) | ||||||||||
946 | if (CV->getAggregateElement(i)->isNullValue()) | ||||||||||
947 | DemandedElts.clearBit(i); | ||||||||||
948 | return DemandedElts; | ||||||||||
949 | } | ||||||||||
950 | |||||||||||
951 | bool InterleavedAccessInfo::isStrided(int Stride) { | ||||||||||
952 | unsigned Factor = std::abs(Stride); | ||||||||||
953 | return Factor >= 2 && Factor <= MaxInterleaveGroupFactor; | ||||||||||
954 | } | ||||||||||
955 | |||||||||||
956 | void InterleavedAccessInfo::collectConstStrideAccesses( | ||||||||||
957 | MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo, | ||||||||||
958 | const ValueToValueMap &Strides) { | ||||||||||
959 | auto &DL = TheLoop->getHeader()->getModule()->getDataLayout(); | ||||||||||
960 | |||||||||||
961 | // Since it's desired that the load/store instructions be maintained in | ||||||||||
962 | // "program order" for the interleaved access analysis, we have to visit the | ||||||||||
963 | // blocks in the loop in reverse postorder (i.e., in a topological order). | ||||||||||
964 | // Such an ordering will ensure that any load/store that may be executed | ||||||||||
965 | // before a second load/store will precede the second load/store in | ||||||||||
966 | // AccessStrideInfo. | ||||||||||
967 | LoopBlocksDFS DFS(TheLoop); | ||||||||||
968 | DFS.perform(LI); | ||||||||||
969 | for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) | ||||||||||
970 | for (auto &I : *BB) { | ||||||||||
971 | Value *Ptr = getLoadStorePointerOperand(&I); | ||||||||||
972 | if (!Ptr) | ||||||||||
973 | continue; | ||||||||||
974 | Type *ElementTy = getLoadStoreType(&I); | ||||||||||
975 | |||||||||||
976 | // We don't check wrapping here because we don't know yet if Ptr will be | ||||||||||
977 | // part of a full group or a group with gaps. Checking wrapping for all | ||||||||||
978 | // pointers (even those that end up in groups with no gaps) will be overly | ||||||||||
979 | // conservative. For full groups, wrapping should be ok since if we would | ||||||||||
980 | // wrap around the address space we would do a memory access at nullptr | ||||||||||
981 | // even without the transformation. The wrapping checks are therefore | ||||||||||
982 | // deferred until after we've formed the interleaved groups. | ||||||||||
983 | int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, | ||||||||||
984 | /*Assume=*/true, /*ShouldCheckWrap=*/false); | ||||||||||
985 | |||||||||||
986 | const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); | ||||||||||
987 | uint64_t Size = DL.getTypeAllocSize(ElementTy); | ||||||||||
988 | AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, | ||||||||||
989 | getLoadStoreAlignment(&I)); | ||||||||||
990 | } | ||||||||||
991 | } | ||||||||||
992 | |||||||||||
993 | // Analyze interleaved accesses and collect them into interleaved load and | ||||||||||
994 | // store groups. | ||||||||||
995 | // | ||||||||||
996 | // When generating code for an interleaved load group, we effectively hoist all | ||||||||||
997 | // loads in the group to the location of the first load in program order. When | ||||||||||
998 | // generating code for an interleaved store group, we sink all stores to the | ||||||||||
999 | // location of the last store. This code motion can change the order of load | ||||||||||
1000 | // and store instructions and may break dependences. | ||||||||||
1001 | // | ||||||||||
1002 | // The code generation strategy mentioned above ensures that we won't violate | ||||||||||
1003 | // any write-after-read (WAR) dependences. | ||||||||||
1004 | // | ||||||||||
1005 | // E.g., for the WAR dependence: a = A[i]; // (1) | ||||||||||
1006 | // A[i] = b; // (2) | ||||||||||
1007 | // | ||||||||||
1008 | // The store group of (2) is always inserted at or below (2), and the load | ||||||||||
1009 | // group of (1) is always inserted at or above (1). Thus, the instructions will | ||||||||||
1010 | // never be reordered. All other dependences are checked to ensure the | ||||||||||
1011 | // correctness of the instruction reordering. | ||||||||||
1012 | // | ||||||||||
1013 | // The algorithm visits all memory accesses in the loop in bottom-up program | ||||||||||
1014 | // order. Program order is established by traversing the blocks in the loop in | ||||||||||
1015 | // reverse postorder when collecting the accesses. | ||||||||||
1016 | // | ||||||||||
1017 | // We visit the memory accesses in bottom-up order because it can simplify the | ||||||||||
1018 | // construction of store groups in the presence of write-after-write (WAW) | ||||||||||
1019 | // dependences. | ||||||||||
1020 | // | ||||||||||
1021 | // E.g., for the WAW dependence: A[i] = a; // (1) | ||||||||||
1022 | // A[i] = b; // (2) | ||||||||||
1023 | // A[i + 1] = c; // (3) | ||||||||||
1024 | // | ||||||||||
1025 | // We will first create a store group with (3) and (2). (1) can't be added to | ||||||||||
1026 | // this group because it and (2) are dependent. However, (1) can be grouped | ||||||||||
1027 | // with other accesses that may precede it in program order. Note that a | ||||||||||
1028 | // bottom-up order does not imply that WAW dependences should not be checked. | ||||||||||
1029 | void InterleavedAccessInfo::analyzeInterleaving( | ||||||||||
1030 | bool EnablePredicatedInterleavedMemAccesses) { | ||||||||||
1031 | LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n")do { } while (false); | ||||||||||
| |||||||||||
1032 | const ValueToValueMap &Strides = LAI->getSymbolicStrides(); | ||||||||||
1033 | |||||||||||
1034 | // Holds all accesses with a constant stride. | ||||||||||
1035 | MapVector<Instruction *, StrideDescriptor> AccessStrideInfo; | ||||||||||
1036 | collectConstStrideAccesses(AccessStrideInfo, Strides); | ||||||||||
1037 | |||||||||||
1038 | if (AccessStrideInfo.empty()) | ||||||||||
1039 | return; | ||||||||||
1040 | |||||||||||
1041 | // Collect the dependences in the loop. | ||||||||||
1042 | collectDependences(); | ||||||||||
1043 | |||||||||||
1044 | // Holds all interleaved store groups temporarily. | ||||||||||
1045 | SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups; | ||||||||||
1046 | // Holds all interleaved load groups temporarily. | ||||||||||
1047 | SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups; | ||||||||||
1048 | |||||||||||
1049 | // Search in bottom-up program order for pairs of accesses (A and B) that can | ||||||||||
1050 | // form interleaved load or store groups. In the algorithm below, access A | ||||||||||
1051 | // precedes access B in program order. We initialize a group for B in the | ||||||||||
1052 | // outer loop of the algorithm, and then in the inner loop, we attempt to | ||||||||||
1053 | // insert each A into B's group if: | ||||||||||
1054 | // | ||||||||||
1055 | // 1. A and B have the same stride, | ||||||||||
1056 | // 2. A and B have the same memory object size, and | ||||||||||
1057 | // 3. A belongs in B's group according to its distance from B. | ||||||||||
1058 | // | ||||||||||
1059 | // Special care is taken to ensure group formation will not break any | ||||||||||
1060 | // dependences. | ||||||||||
1061 | for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend(); | ||||||||||
1062 | BI != E; ++BI) { | ||||||||||
1063 | Instruction *B = BI->first; | ||||||||||
1064 | StrideDescriptor DesB = BI->second; | ||||||||||
1065 | |||||||||||
1066 | // Initialize a group for B if it has an allowable stride. Even if we don't | ||||||||||
1067 | // create a group for B, we continue with the bottom-up algorithm to ensure | ||||||||||
1068 | // we don't break any of B's dependences. | ||||||||||
1069 | InterleaveGroup<Instruction> *Group = nullptr; | ||||||||||
1070 | if (isStrided(DesB.Stride) && | ||||||||||
1071 | (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) { | ||||||||||
1072 | Group = getInterleaveGroup(B); | ||||||||||
1073 | if (!Group) { | ||||||||||
1074 | LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *Bdo { } while (false) | ||||||||||
1075 | << '\n')do { } while (false); | ||||||||||
1076 | Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment); | ||||||||||
1077 | } | ||||||||||
1078 | if (B->mayWriteToMemory()) | ||||||||||
1079 | StoreGroups.insert(Group); | ||||||||||
1080 | else | ||||||||||
1081 | LoadGroups.insert(Group); | ||||||||||
1082 | } | ||||||||||
1083 | |||||||||||
1084 | for (auto AI = std::next(BI); AI != E; ++AI) { | ||||||||||
1085 | Instruction *A = AI->first; | ||||||||||
1086 | StrideDescriptor DesA = AI->second; | ||||||||||
1087 | |||||||||||
1088 | // Our code motion strategy implies that we can't have dependences | ||||||||||
1089 | // between accesses in an interleaved group and other accesses located | ||||||||||
1090 | // between the first and last member of the group. Note that this also | ||||||||||
1091 | // means that a group can't have more than one member at a given offset. | ||||||||||
1092 | // The accesses in a group can have dependences with other accesses, but | ||||||||||
1093 | // we must ensure we don't extend the boundaries of the group such that | ||||||||||
1094 | // we encompass those dependent accesses. | ||||||||||
1095 | // | ||||||||||
1096 | // For example, assume we have the sequence of accesses shown below in a | ||||||||||
1097 | // stride-2 loop: | ||||||||||
1098 | // | ||||||||||
1099 | // (1, 2) is a group | A[i] = a; // (1) | ||||||||||
1100 | // | A[i-1] = b; // (2) | | ||||||||||
1101 | // A[i-3] = c; // (3) | ||||||||||
1102 | // A[i] = d; // (4) | (2, 4) is not a group | ||||||||||
1103 | // | ||||||||||
1104 | // Because accesses (2) and (3) are dependent, we can group (2) with (1) | ||||||||||
1105 | // but not with (4). If we did, the dependent access (3) would be within | ||||||||||
1106 | // the boundaries of the (2, 4) group. | ||||||||||
1107 | if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) { | ||||||||||
1108 | // If a dependence exists and A is already in a group, we know that A | ||||||||||
1109 | // must be a store since A precedes B and WAR dependences are allowed. | ||||||||||
1110 | // Thus, A would be sunk below B. We release A's group to prevent this | ||||||||||
1111 | // illegal code motion. A will then be free to form another group with | ||||||||||
1112 | // instructions that precede it. | ||||||||||
1113 | if (isInterleaved(A)) { | ||||||||||
1114 | InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A); | ||||||||||
1115 | |||||||||||
1116 | LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "do { } while (false) | ||||||||||
1117 | "dependence between " << *A << " and "<< *B << '\n')do { } while (false); | ||||||||||
1118 | |||||||||||
1119 | StoreGroups.remove(StoreGroup); | ||||||||||
1120 | releaseGroup(StoreGroup); | ||||||||||
1121 | } | ||||||||||
1122 | |||||||||||
1123 | // If a dependence exists and A is not already in a group (or it was | ||||||||||
1124 | // and we just released it), B might be hoisted above A (if B is a | ||||||||||
1125 | // load) or another store might be sunk below A (if B is a store). In | ||||||||||
1126 | // either case, we can't add additional instructions to B's group. B | ||||||||||
1127 | // will only form a group with instructions that it precedes. | ||||||||||
1128 | break; | ||||||||||
1129 | } | ||||||||||
1130 | |||||||||||
1131 | // At this point, we've checked for illegal code motion. If either A or B | ||||||||||
1132 | // isn't strided, there's nothing left to do. | ||||||||||
1133 | if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride)) | ||||||||||
1134 | continue; | ||||||||||
1135 | |||||||||||
1136 | // Ignore A if it's already in a group or isn't the same kind of memory | ||||||||||
1137 | // operation as B. | ||||||||||
1138 | // Note that mayReadFromMemory() isn't mutually exclusive to | ||||||||||
1139 | // mayWriteToMemory in the case of atomic loads. We shouldn't see those | ||||||||||
1140 | // here, canVectorizeMemory() should have returned false - except for the | ||||||||||
1141 | // case we asked for optimization remarks. | ||||||||||
1142 | if (isInterleaved(A) || | ||||||||||
1143 | (A->mayReadFromMemory() != B->mayReadFromMemory()) || | ||||||||||
1144 | (A->mayWriteToMemory() != B->mayWriteToMemory())) | ||||||||||
1145 | continue; | ||||||||||
1146 | |||||||||||
1147 | // Check rules 1 and 2. Ignore A if its stride or size is different from | ||||||||||
1148 | // that of B. | ||||||||||
1149 | if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size) | ||||||||||
1150 | continue; | ||||||||||
1151 | |||||||||||
1152 | // Ignore A if the memory object of A and B don't belong to the same | ||||||||||
1153 | // address space | ||||||||||
1154 | if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B)) | ||||||||||
1155 | continue; | ||||||||||
1156 | |||||||||||
1157 | // Calculate the distance from A to B. | ||||||||||
1158 | const SCEVConstant *DistToB = dyn_cast<SCEVConstant>( | ||||||||||
1159 | PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev)); | ||||||||||
1160 | if (!DistToB
| ||||||||||
1161 | continue; | ||||||||||
1162 | int64_t DistanceToB = DistToB->getAPInt().getSExtValue(); | ||||||||||
1163 | |||||||||||
1164 | // Check rule 3. Ignore A if its distance to B is not a multiple of the | ||||||||||
1165 | // size. | ||||||||||
1166 | if (DistanceToB % static_cast<int64_t>(DesB.Size)) | ||||||||||
1167 | continue; | ||||||||||
1168 | |||||||||||
1169 | // All members of a predicated interleave-group must have the same predicate, | ||||||||||
1170 | // and currently must reside in the same BB. | ||||||||||
1171 | BasicBlock *BlockA = A->getParent(); | ||||||||||
1172 | BasicBlock *BlockB = B->getParent(); | ||||||||||
1173 | if ((isPredicated(BlockA) || isPredicated(BlockB)) && | ||||||||||
1174 | (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB)) | ||||||||||
1175 | continue; | ||||||||||
1176 | |||||||||||
1177 | // The index of A is the index of B plus A's distance to B in multiples | ||||||||||
1178 | // of the size. | ||||||||||
1179 | int IndexA = | ||||||||||
1180 | Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size); | ||||||||||
| |||||||||||
1181 | |||||||||||
1182 | // Try to insert A into B's group. | ||||||||||
1183 | if (Group->insertMember(A, IndexA, DesA.Alignment)) { | ||||||||||
1184 | LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'do { } while (false) | ||||||||||
1185 | << " into the interleave group with" << *Bdo { } while (false) | ||||||||||
1186 | << '\n')do { } while (false); | ||||||||||
1187 | InterleaveGroupMap[A] = Group; | ||||||||||
1188 | |||||||||||
1189 | // Set the first load in program order as the insert position. | ||||||||||
1190 | if (A->mayReadFromMemory()) | ||||||||||
1191 | Group->setInsertPos(A); | ||||||||||
1192 | } | ||||||||||
1193 | } // Iteration over A accesses. | ||||||||||
1194 | } // Iteration over B accesses. | ||||||||||
1195 | |||||||||||
1196 | // Remove interleaved store groups with gaps. | ||||||||||
1197 | for (auto *Group : StoreGroups) | ||||||||||
1198 | if (Group->getNumMembers() != Group->getFactor()) { | ||||||||||
1199 | LLVM_DEBUG(do { } while (false) | ||||||||||
1200 | dbgs() << "LV: Invalidate candidate interleaved store group due "do { } while (false) | ||||||||||
1201 | "to gaps.\n")do { } while (false); | ||||||||||
1202 | releaseGroup(Group); | ||||||||||
1203 | } | ||||||||||
1204 | // Remove interleaved groups with gaps (currently only loads) whose memory | ||||||||||
1205 | // accesses may wrap around. We have to revisit the getPtrStride analysis, | ||||||||||
1206 | // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does | ||||||||||
1207 | // not check wrapping (see documentation there). | ||||||||||
1208 | // FORNOW we use Assume=false; | ||||||||||
1209 | // TODO: Change to Assume=true but making sure we don't exceed the threshold | ||||||||||
1210 | // of runtime SCEV assumptions checks (thereby potentially failing to | ||||||||||
1211 | // vectorize altogether). | ||||||||||
1212 | // Additional optional optimizations: | ||||||||||
1213 | // TODO: If we are peeling the loop and we know that the first pointer doesn't | ||||||||||
1214 | // wrap then we can deduce that all pointers in the group don't wrap. | ||||||||||
1215 | // This means that we can forcefully peel the loop in order to only have to | ||||||||||
1216 | // check the first pointer for no-wrap. When we'll change to use Assume=true | ||||||||||
1217 | // we'll only need at most one runtime check per interleaved group. | ||||||||||
1218 | for (auto *Group : LoadGroups) { | ||||||||||
1219 | // Case 1: A full group. Can Skip the checks; For full groups, if the wide | ||||||||||
1220 | // load would wrap around the address space we would do a memory access at | ||||||||||
1221 | // nullptr even without the transformation. | ||||||||||
1222 | if (Group->getNumMembers() == Group->getFactor()) | ||||||||||
1223 | continue; | ||||||||||
1224 | |||||||||||
1225 | // Case 2: If first and last members of the group don't wrap this implies | ||||||||||
1226 | // that all the pointers in the group don't wrap. | ||||||||||
1227 | // So we check only group member 0 (which is always guaranteed to exist), | ||||||||||
1228 | // and group member Factor - 1; If the latter doesn't exist we rely on | ||||||||||
1229 | // peeling (if it is a non-reversed accsess -- see Case 3). | ||||||||||
1230 | Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0)); | ||||||||||
1231 | if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false, | ||||||||||
1232 | /*ShouldCheckWrap=*/true)) { | ||||||||||
1233 | LLVM_DEBUG(do { } while (false) | ||||||||||
1234 | dbgs() << "LV: Invalidate candidate interleaved group due to "do { } while (false) | ||||||||||
1235 | "first group member potentially pointer-wrapping.\n")do { } while (false); | ||||||||||
1236 | releaseGroup(Group); | ||||||||||
1237 | continue; | ||||||||||
1238 | } | ||||||||||
1239 | Instruction *LastMember = Group->getMember(Group->getFactor() - 1); | ||||||||||
1240 | if (LastMember) { | ||||||||||
1241 | Value *LastMemberPtr = getLoadStorePointerOperand(LastMember); | ||||||||||
1242 | if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false, | ||||||||||
1243 | /*ShouldCheckWrap=*/true)) { | ||||||||||
1244 | LLVM_DEBUG(do { } while (false) | ||||||||||
1245 | dbgs() << "LV: Invalidate candidate interleaved group due to "do { } while (false) | ||||||||||
1246 | "last group member potentially pointer-wrapping.\n")do { } while (false); | ||||||||||
1247 | releaseGroup(Group); | ||||||||||
1248 | } | ||||||||||
1249 | } else { | ||||||||||
1250 | // Case 3: A non-reversed interleaved load group with gaps: We need | ||||||||||
1251 | // to execute at least one scalar epilogue iteration. This will ensure | ||||||||||
1252 | // we don't speculatively access memory out-of-bounds. We only need | ||||||||||
1253 | // to look for a member at index factor - 1, since every group must have | ||||||||||
1254 | // a member at index zero. | ||||||||||
1255 | if (Group->isReverse()) { | ||||||||||
1256 | LLVM_DEBUG(do { } while (false) | ||||||||||
1257 | dbgs() << "LV: Invalidate candidate interleaved group due to "do { } while (false) | ||||||||||
1258 | "a reverse access with gaps.\n")do { } while (false); | ||||||||||
1259 | releaseGroup(Group); | ||||||||||
1260 | continue; | ||||||||||
1261 | } | ||||||||||
1262 | LLVM_DEBUG(do { } while (false) | ||||||||||
1263 | dbgs() << "LV: Interleaved group requires epilogue iteration.\n")do { } while (false); | ||||||||||
1264 | RequiresScalarEpilogue = true; | ||||||||||
1265 | } | ||||||||||
1266 | } | ||||||||||
1267 | } | ||||||||||
1268 | |||||||||||
1269 | void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() { | ||||||||||
1270 | // If no group had triggered the requirement to create an epilogue loop, | ||||||||||
1271 | // there is nothing to do. | ||||||||||
1272 | if (!requiresScalarEpilogue()) | ||||||||||
1273 | return; | ||||||||||
1274 | |||||||||||
1275 | bool ReleasedGroup = false; | ||||||||||
1276 | // Release groups requiring scalar epilogues. Note that this also removes them | ||||||||||
1277 | // from InterleaveGroups. | ||||||||||
1278 | for (auto *Group : make_early_inc_range(InterleaveGroups)) { | ||||||||||
1279 | if (!Group->requiresScalarEpilogue()) | ||||||||||
1280 | continue; | ||||||||||
1281 | LLVM_DEBUG(do { } while (false) | ||||||||||
1282 | dbgs()do { } while (false) | ||||||||||
1283 | << "LV: Invalidate candidate interleaved group due to gaps that "do { } while (false) | ||||||||||
1284 | "require a scalar epilogue (not allowed under optsize) and cannot "do { } while (false) | ||||||||||
1285 | "be masked (not enabled). \n")do { } while (false); | ||||||||||
1286 | releaseGroup(Group); | ||||||||||
1287 | ReleasedGroup = true; | ||||||||||
1288 | } | ||||||||||
1289 | assert(ReleasedGroup && "At least one group must be invalidated, as a "((void)0) | ||||||||||
1290 | "scalar epilogue was required")((void)0); | ||||||||||
1291 | (void)ReleasedGroup; | ||||||||||
1292 | RequiresScalarEpilogue = false; | ||||||||||
1293 | } | ||||||||||
1294 | |||||||||||
1295 | template <typename InstT> | ||||||||||
1296 | void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const { | ||||||||||
1297 | llvm_unreachable("addMetadata can only be used for Instruction")__builtin_unreachable(); | ||||||||||
1298 | } | ||||||||||
1299 | |||||||||||
1300 | namespace llvm { | ||||||||||
1301 | template <> | ||||||||||
1302 | void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const { | ||||||||||
1303 | SmallVector<Value *, 4> VL; | ||||||||||
1304 | std::transform(Members.begin(), Members.end(), std::back_inserter(VL), | ||||||||||
1305 | [](std::pair<int, Instruction *> p) { return p.second; }); | ||||||||||
1306 | propagateMetadata(NewInst, VL); | ||||||||||
1307 | } | ||||||||||
1308 | } | ||||||||||
1309 | |||||||||||
1310 | std::string VFABI::mangleTLIVectorName(StringRef VectorName, | ||||||||||
1311 | StringRef ScalarName, unsigned numArgs, | ||||||||||
1312 | ElementCount VF) { | ||||||||||
1313 | SmallString<256> Buffer; | ||||||||||
1314 | llvm::raw_svector_ostream Out(Buffer); | ||||||||||
1315 | Out << "_ZGV" << VFABI::_LLVM_ << "N"; | ||||||||||
1316 | if (VF.isScalable()) | ||||||||||
1317 | Out << 'x'; | ||||||||||
1318 | else | ||||||||||
1319 | Out << VF.getFixedValue(); | ||||||||||
1320 | for (unsigned I = 0; I < numArgs; ++I) | ||||||||||
1321 | Out << "v"; | ||||||||||
1322 | Out << "_" << ScalarName << "(" << VectorName << ")"; | ||||||||||
1323 | return std::string(Out.str()); | ||||||||||
1324 | } | ||||||||||
1325 | |||||||||||
1326 | void VFABI::getVectorVariantNames( | ||||||||||
1327 | const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) { | ||||||||||
1328 | const StringRef S = | ||||||||||
1329 | CI.getAttribute(AttributeList::FunctionIndex, VFABI::MappingsAttrName) | ||||||||||
1330 | .getValueAsString(); | ||||||||||
1331 | if (S.empty()) | ||||||||||
1332 | return; | ||||||||||
1333 | |||||||||||
1334 | SmallVector<StringRef, 8> ListAttr; | ||||||||||
1335 | S.split(ListAttr, ","); | ||||||||||
1336 | |||||||||||
1337 | for (auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) { | ||||||||||
1338 | #ifndef NDEBUG1 | ||||||||||
1339 | LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n")do { } while (false); | ||||||||||
1340 | Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S, *(CI.getModule())); | ||||||||||
1341 | assert(Info.hasValue() && "Invalid name for a VFABI variant.")((void)0); | ||||||||||
1342 | assert(CI.getModule()->getFunction(Info.getValue().VectorName) &&((void)0) | ||||||||||
1343 | "Vector function is missing.")((void)0); | ||||||||||
1344 | #endif | ||||||||||
1345 | VariantMappings.push_back(std::string(S)); | ||||||||||
1346 | } | ||||||||||
1347 | } | ||||||||||
1348 | |||||||||||
1349 | bool VFShape::hasValidParameterList() const { | ||||||||||
1350 | for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams; | ||||||||||
1351 | ++Pos) { | ||||||||||
1352 | assert(Parameters[Pos].ParamPos == Pos && "Broken parameter list.")((void)0); | ||||||||||
1353 | |||||||||||
1354 | switch (Parameters[Pos].ParamKind) { | ||||||||||
1355 | default: // Nothing to check. | ||||||||||
1356 | break; | ||||||||||
1357 | case VFParamKind::OMP_Linear: | ||||||||||
1358 | case VFParamKind::OMP_LinearRef: | ||||||||||
1359 | case VFParamKind::OMP_LinearVal: | ||||||||||
1360 | case VFParamKind::OMP_LinearUVal: | ||||||||||
1361 | // Compile time linear steps must be non-zero. | ||||||||||
1362 | if (Parameters[Pos].LinearStepOrPos == 0) | ||||||||||
1363 | return false; | ||||||||||
1364 | break; | ||||||||||
1365 | case VFParamKind::OMP_LinearPos: | ||||||||||
1366 | case VFParamKind::OMP_LinearRefPos: | ||||||||||
1367 | case VFParamKind::OMP_LinearValPos: | ||||||||||
1368 | case VFParamKind::OMP_LinearUValPos: | ||||||||||
1369 | // The runtime linear step must be referring to some other | ||||||||||
1370 | // parameters in the signature. | ||||||||||
1371 | if (Parameters[Pos].LinearStepOrPos >= int(NumParams)) | ||||||||||
1372 | return false; | ||||||||||
1373 | // The linear step parameter must be marked as uniform. | ||||||||||
1374 | if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind != | ||||||||||
1375 | VFParamKind::OMP_Uniform) | ||||||||||
1376 | return false; | ||||||||||
1377 | // The linear step parameter can't point at itself. | ||||||||||
1378 | if (Parameters[Pos].LinearStepOrPos == int(Pos)) | ||||||||||
1379 | return false; | ||||||||||
1380 | break; | ||||||||||
1381 | case VFParamKind::GlobalPredicate: | ||||||||||
1382 | // The global predicate must be the unique. Can be placed anywhere in the | ||||||||||
1383 | // signature. | ||||||||||
1384 | for (unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos) | ||||||||||
1385 | if (Parameters[NextPos].ParamKind == VFParamKind::GlobalPredicate) | ||||||||||
1386 | return false; | ||||||||||
1387 | break; | ||||||||||
1388 | } | ||||||||||
1389 | } | ||||||||||
1390 | return true; | ||||||||||
1391 | } |
1 | // -*- C++ -*- |
2 | //===----------------------------------------------------------------------===// |
3 | // |
4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | // See https://llvm.org/LICENSE.txt for license information. |
6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | #ifndef _LIBCPP___ITERATOR_REVERSE_ITERATOR_H |
11 | #define _LIBCPP___ITERATOR_REVERSE_ITERATOR_H |
12 | |
13 | #include <__config> |
14 | #include <__iterator/iterator.h> |
15 | #include <__iterator/iterator_traits.h> |
16 | #include <__memory/addressof.h> |
17 | #include <type_traits> |
18 | |
19 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
20 | #pragma GCC system_header |
21 | #endif |
22 | |
23 | _LIBCPP_PUSH_MACROSpush_macro("min") push_macro("max") |
24 | #include <__undef_macros> |
25 | |
26 | _LIBCPP_BEGIN_NAMESPACE_STDnamespace std { inline namespace __1 { |
27 | |
28 | template <class _Tp, class = void> |
29 | struct __is_stashing_iterator : false_type {}; |
30 | |
31 | template <class _Tp> |
32 | struct __is_stashing_iterator<_Tp, typename __void_t<typename _Tp::__stashing_iterator_tag>::type> |
33 | : true_type {}; |
34 | |
35 | _LIBCPP_SUPPRESS_DEPRECATED_PUSHGCC diagnostic push
GCC diagnostic ignored "-Wdeprecated"
GCC diagnostic ignored "-Wdeprecated-declarations" |
36 | template <class _Iter> |
37 | class _LIBCPP_TEMPLATE_VIS__attribute__ ((__type_visibility__("default"))) reverse_iterator |
38 | #if _LIBCPP_STD_VER14 <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) |
39 | : public iterator<typename iterator_traits<_Iter>::iterator_category, |
40 | typename iterator_traits<_Iter>::value_type, |
41 | typename iterator_traits<_Iter>::difference_type, |
42 | typename iterator_traits<_Iter>::pointer, |
43 | typename iterator_traits<_Iter>::reference> |
44 | #endif |
45 | { |
46 | _LIBCPP_SUPPRESS_DEPRECATED_POPGCC diagnostic pop |
47 | private: |
48 | #ifndef _LIBCPP_ABI_NO_ITERATOR_BASES |
49 | _Iter __t; // no longer used as of LWG #2360, not removed due to ABI break |
50 | #endif |
51 | |
52 | static_assert(!__is_stashing_iterator<_Iter>::value, |
53 | "The specified iterator type cannot be used with reverse_iterator; " |
54 | "Using stashing iterators with reverse_iterator causes undefined behavior"); |
55 | |
56 | protected: |
57 | _Iter current; |
58 | public: |
59 | typedef _Iter iterator_type; |
60 | typedef typename iterator_traits<_Iter>::difference_type difference_type; |
61 | typedef typename iterator_traits<_Iter>::reference reference; |
62 | typedef typename iterator_traits<_Iter>::pointer pointer; |
63 | typedef _If<__is_cpp17_random_access_iterator<_Iter>::value, |
64 | random_access_iterator_tag, |
65 | typename iterator_traits<_Iter>::iterator_category> iterator_category; |
66 | typedef typename iterator_traits<_Iter>::value_type value_type; |
67 | |
68 | #if _LIBCPP_STD_VER14 > 17 |
69 | typedef _If<__is_cpp17_random_access_iterator<_Iter>::value, |
70 | random_access_iterator_tag, |
71 | bidirectional_iterator_tag> iterator_concept; |
72 | #endif |
73 | |
74 | #ifndef _LIBCPP_ABI_NO_ITERATOR_BASES |
75 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
76 | reverse_iterator() : __t(), current() {} |
77 | |
78 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
79 | explicit reverse_iterator(_Iter __x) : __t(__x), current(__x) {} |
80 | |
81 | template <class _Up, class = _EnableIf< |
82 | !is_same<_Up, _Iter>::value && is_convertible<_Up const&, _Iter>::value |
83 | > > |
84 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
85 | reverse_iterator(const reverse_iterator<_Up>& __u) |
86 | : __t(__u.base()), current(__u.base()) |
87 | { } |
88 | |
89 | template <class _Up, class = _EnableIf< |
90 | !is_same<_Up, _Iter>::value && |
91 | is_convertible<_Up const&, _Iter>::value && |
92 | is_assignable<_Up const&, _Iter>::value |
93 | > > |
94 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
95 | reverse_iterator& operator=(const reverse_iterator<_Up>& __u) { |
96 | __t = current = __u.base(); |
97 | return *this; |
98 | } |
99 | #else |
100 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
101 | reverse_iterator() : current() {} |
102 | |
103 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
104 | explicit reverse_iterator(_Iter __x) : current(__x) {} |
105 | |
106 | template <class _Up, class = _EnableIf< |
107 | !is_same<_Up, _Iter>::value && is_convertible<_Up const&, _Iter>::value |
108 | > > |
109 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
110 | reverse_iterator(const reverse_iterator<_Up>& __u) |
111 | : current(__u.base()) |
112 | { } |
113 | |
114 | template <class _Up, class = _EnableIf< |
115 | !is_same<_Up, _Iter>::value && |
116 | is_convertible<_Up const&, _Iter>::value && |
117 | is_assignable<_Up const&, _Iter>::value |
118 | > > |
119 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
120 | reverse_iterator& operator=(const reverse_iterator<_Up>& __u) { |
121 | current = __u.base(); |
122 | return *this; |
123 | } |
124 | #endif |
125 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
126 | _Iter base() const {return current;} |
127 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
128 | reference operator*() const {_Iter __tmp = current; return *--__tmp;} |
129 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
130 | pointer operator->() const {return _VSTDstd::__1::addressof(operator*());} |
131 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
132 | reverse_iterator& operator++() {--current; return *this;} |
133 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
134 | reverse_iterator operator++(int) {reverse_iterator __tmp(*this); --current; return __tmp;} |
135 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
136 | reverse_iterator& operator--() {++current; return *this;} |
137 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
138 | reverse_iterator operator--(int) {reverse_iterator __tmp(*this); ++current; return __tmp;} |
139 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
140 | reverse_iterator operator+ (difference_type __n) const {return reverse_iterator(current - __n);} |
141 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
142 | reverse_iterator& operator+=(difference_type __n) {current -= __n; return *this;} |
143 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
144 | reverse_iterator operator- (difference_type __n) const {return reverse_iterator(current + __n);} |
145 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
146 | reverse_iterator& operator-=(difference_type __n) {current += __n; return *this;} |
147 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
148 | reference operator[](difference_type __n) const {return *(*this + __n);} |
149 | }; |
150 | |
151 | template <class _Iter1, class _Iter2> |
152 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
153 | bool |
154 | operator==(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
155 | { |
156 | return __x.base() == __y.base(); |
157 | } |
158 | |
159 | template <class _Iter1, class _Iter2> |
160 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
161 | bool |
162 | operator<(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
163 | { |
164 | return __x.base() > __y.base(); |
165 | } |
166 | |
167 | template <class _Iter1, class _Iter2> |
168 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
169 | bool |
170 | operator!=(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
171 | { |
172 | return __x.base() != __y.base(); |
173 | } |
174 | |
175 | template <class _Iter1, class _Iter2> |
176 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
177 | bool |
178 | operator>(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
179 | { |
180 | return __x.base() < __y.base(); |
181 | } |
182 | |
183 | template <class _Iter1, class _Iter2> |
184 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
185 | bool |
186 | operator>=(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
187 | { |
188 | return __x.base() <= __y.base(); |
189 | } |
190 | |
191 | template <class _Iter1, class _Iter2> |
192 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
193 | bool |
194 | operator<=(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
195 | { |
196 | return __x.base() >= __y.base(); |
197 | } |
198 | |
199 | #ifndef _LIBCPP_CXX03_LANG |
200 | template <class _Iter1, class _Iter2> |
201 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
202 | auto |
203 | operator-(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
204 | -> decltype(__y.base() - __x.base()) |
205 | { |
206 | return __y.base() - __x.base(); |
207 | } |
208 | #else |
209 | template <class _Iter1, class _Iter2> |
210 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) |
211 | typename reverse_iterator<_Iter1>::difference_type |
212 | operator-(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
213 | { |
214 | return __y.base() - __x.base(); |
215 | } |
216 | #endif |
217 | |
218 | template <class _Iter> |
219 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
220 | reverse_iterator<_Iter> |
221 | operator+(typename reverse_iterator<_Iter>::difference_type __n, const reverse_iterator<_Iter>& __x) |
222 | { |
223 | return reverse_iterator<_Iter>(__x.base() - __n); |
224 | } |
225 | |
226 | #if _LIBCPP_STD_VER14 > 11 |
227 | template <class _Iter> |
228 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
229 | reverse_iterator<_Iter> make_reverse_iterator(_Iter __i) |
230 | { |
231 | return reverse_iterator<_Iter>(__i); |
232 | } |
233 | #endif |
234 | |
235 | _LIBCPP_END_NAMESPACE_STD} } |
236 | |
237 | _LIBCPP_POP_MACROSpop_macro("min") pop_macro("max") |
238 | |
239 | #endif // _LIBCPP___ITERATOR_REVERSE_ITERATOR_H |
1 | // -*- C++ -*- |
2 | //===----------------------------------------------------------------------===// |
3 | // |
4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | // See https://llvm.org/LICENSE.txt for license information. |
6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | #ifndef _LIBCPP___ITERATOR_WRAP_ITER_H |
11 | #define _LIBCPP___ITERATOR_WRAP_ITER_H |
12 | |
13 | #include <__config> |
14 | #include <__debug> |
15 | #include <__iterator/iterator_traits.h> |
16 | #include <__memory/pointer_traits.h> // __to_address |
17 | #include <type_traits> |
18 | |
19 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
20 | #pragma GCC system_header |
21 | #endif |
22 | |
23 | _LIBCPP_PUSH_MACROSpush_macro("min") push_macro("max") |
24 | #include <__undef_macros> |
25 | |
26 | _LIBCPP_BEGIN_NAMESPACE_STDnamespace std { inline namespace __1 { |
27 | |
28 | template <class _Iter> |
29 | class __wrap_iter |
30 | { |
31 | public: |
32 | typedef _Iter iterator_type; |
33 | typedef typename iterator_traits<iterator_type>::value_type value_type; |
34 | typedef typename iterator_traits<iterator_type>::difference_type difference_type; |
35 | typedef typename iterator_traits<iterator_type>::pointer pointer; |
36 | typedef typename iterator_traits<iterator_type>::reference reference; |
37 | typedef typename iterator_traits<iterator_type>::iterator_category iterator_category; |
38 | #if _LIBCPP_STD_VER14 > 17 |
39 | typedef contiguous_iterator_tag iterator_concept; |
40 | #endif |
41 | |
42 | private: |
43 | iterator_type __i; |
44 | public: |
45 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter() _NOEXCEPTnoexcept |
46 | #if _LIBCPP_STD_VER14 > 11 |
47 | : __i{} |
48 | #endif |
49 | { |
50 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
51 | __get_db()->__insert_i(this); |
52 | #endif |
53 | } |
54 | template <class _Up> _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
55 | __wrap_iter(const __wrap_iter<_Up>& __u, |
56 | typename enable_if<is_convertible<_Up, iterator_type>::value>::type* = nullptr) _NOEXCEPTnoexcept |
57 | : __i(__u.base()) |
58 | { |
59 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
60 | __get_db()->__iterator_copy(this, &__u); |
61 | #endif |
62 | } |
63 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
64 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
65 | __wrap_iter(const __wrap_iter& __x) |
66 | : __i(__x.base()) |
67 | { |
68 | __get_db()->__iterator_copy(this, &__x); |
69 | } |
70 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
71 | __wrap_iter& operator=(const __wrap_iter& __x) |
72 | { |
73 | if (this != &__x) |
74 | { |
75 | __get_db()->__iterator_copy(this, &__x); |
76 | __i = __x.__i; |
77 | } |
78 | return *this; |
79 | } |
80 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
81 | ~__wrap_iter() |
82 | { |
83 | __get_db()->__erase_i(this); |
84 | } |
85 | #endif |
86 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr reference operator*() const _NOEXCEPTnoexcept |
87 | { |
88 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
89 | _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this),((void)0) |
90 | "Attempted to dereference a non-dereferenceable iterator")((void)0); |
91 | #endif |
92 | return *__i; |
93 | } |
94 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr pointer operator->() const _NOEXCEPTnoexcept |
95 | { |
96 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
97 | _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this),((void)0) |
98 | "Attempted to dereference a non-dereferenceable iterator")((void)0); |
99 | #endif |
100 | return _VSTDstd::__1::__to_address(__i); |
101 | } |
102 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter& operator++() _NOEXCEPTnoexcept |
103 | { |
104 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
105 | _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this),((void)0) |
106 | "Attempted to increment a non-incrementable iterator")((void)0); |
107 | #endif |
108 | ++__i; |
109 | return *this; |
110 | } |
111 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter operator++(int) _NOEXCEPTnoexcept |
112 | {__wrap_iter __tmp(*this); ++(*this); return __tmp;} |
113 | |
114 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter& operator--() _NOEXCEPTnoexcept |
115 | { |
116 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
117 | _LIBCPP_ASSERT(__get_const_db()->__decrementable(this),((void)0) |
118 | "Attempted to decrement a non-decrementable iterator")((void)0); |
119 | #endif |
120 | --__i; |
121 | return *this; |
122 | } |
123 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter operator--(int) _NOEXCEPTnoexcept |
124 | {__wrap_iter __tmp(*this); --(*this); return __tmp;} |
125 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter operator+ (difference_type __n) const _NOEXCEPTnoexcept |
126 | {__wrap_iter __w(*this); __w += __n; return __w;} |
127 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter& operator+=(difference_type __n) _NOEXCEPTnoexcept |
128 | { |
129 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
130 | _LIBCPP_ASSERT(__get_const_db()->__addable(this, __n),((void)0) |
131 | "Attempted to add/subtract an iterator outside its valid range")((void)0); |
132 | #endif |
133 | __i += __n; |
134 | return *this; |
135 | } |
136 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter operator- (difference_type __n) const _NOEXCEPTnoexcept |
137 | {return *this + (-__n);} |
138 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter& operator-=(difference_type __n) _NOEXCEPTnoexcept |
139 | {*this += -__n; return *this;} |
140 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr reference operator[](difference_type __n) const _NOEXCEPTnoexcept |
141 | { |
142 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
143 | _LIBCPP_ASSERT(__get_const_db()->__subscriptable(this, __n),((void)0) |
144 | "Attempted to subscript an iterator outside its valid range")((void)0); |
145 | #endif |
146 | return __i[__n]; |
147 | } |
148 | |
149 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr iterator_type base() const _NOEXCEPTnoexcept {return __i;} |
150 | |
151 | private: |
152 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
153 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter(const void* __p, iterator_type __x) : __i(__x) |
154 | { |
155 | __get_db()->__insert_ic(this, __p); |
156 | } |
157 | #else |
158 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter(iterator_type __x) _NOEXCEPTnoexcept : __i(__x) {} |
159 | #endif |
160 | |
161 | template <class _Up> friend class __wrap_iter; |
162 | template <class _CharT, class _Traits, class _Alloc> friend class basic_string; |
163 | template <class _Tp, class _Alloc> friend class _LIBCPP_TEMPLATE_VIS__attribute__ ((__type_visibility__("default"))) vector; |
164 | template <class _Tp, size_t> friend class _LIBCPP_TEMPLATE_VIS__attribute__ ((__type_visibility__("default"))) span; |
165 | }; |
166 | |
167 | template <class _Iter1> |
168 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
169 | bool operator==(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPTnoexcept |
170 | { |
171 | return __x.base() == __y.base(); |
172 | } |
173 | |
174 | template <class _Iter1, class _Iter2> |
175 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
176 | bool operator==(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
177 | { |
178 | return __x.base() == __y.base(); |
179 | } |
180 | |
181 | template <class _Iter1> |
182 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
183 | bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPTnoexcept |
184 | { |
185 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
186 | _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y),((void)0) |
187 | "Attempted to compare incomparable iterators")((void)0); |
188 | #endif |
189 | return __x.base() < __y.base(); |
190 | } |
191 | |
192 | template <class _Iter1, class _Iter2> |
193 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
194 | bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
195 | { |
196 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
197 | _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y),((void)0) |
198 | "Attempted to compare incomparable iterators")((void)0); |
199 | #endif |
200 | return __x.base() < __y.base(); |
201 | } |
202 | |
203 | template <class _Iter1> |
204 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
205 | bool operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPTnoexcept |
206 | { |
207 | return !(__x == __y); |
208 | } |
209 | |
210 | template <class _Iter1, class _Iter2> |
211 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
212 | bool operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
213 | { |
214 | return !(__x == __y); |
215 | } |
216 | |
217 | template <class _Iter1> |
218 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
219 | bool operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPTnoexcept |
220 | { |
221 | return __y < __x; |
222 | } |
223 | |
224 | template <class _Iter1, class _Iter2> |
225 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
226 | bool operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
227 | { |
228 | return __y < __x; |
229 | } |
230 | |
231 | template <class _Iter1> |
232 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
233 | bool operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPTnoexcept |
234 | { |
235 | return !(__x < __y); |
236 | } |
237 | |
238 | template <class _Iter1, class _Iter2> |
239 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
240 | bool operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
241 | { |
242 | return !(__x < __y); |
243 | } |
244 | |
245 | template <class _Iter1> |
246 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
247 | bool operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPTnoexcept |
248 | { |
249 | return !(__y < __x); |
250 | } |
251 | |
252 | template <class _Iter1, class _Iter2> |
253 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
254 | bool operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
255 | { |
256 | return !(__y < __x); |
257 | } |
258 | |
259 | template <class _Iter1, class _Iter2> |
260 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
261 | #ifndef _LIBCPP_CXX03_LANG |
262 | auto operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
263 | -> decltype(__x.base() - __y.base()) |
264 | #else |
265 | typename __wrap_iter<_Iter1>::difference_type |
266 | operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
267 | #endif // C++03 |
268 | { |
269 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
270 | _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y),((void)0) |
271 | "Attempted to subtract incompatible iterators")((void)0); |
272 | #endif |
273 | return __x.base() - __y.base(); |
274 | } |
275 | |
276 | template <class _Iter1> |
277 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
278 | __wrap_iter<_Iter1> operator+(typename __wrap_iter<_Iter1>::difference_type __n, __wrap_iter<_Iter1> __x) _NOEXCEPTnoexcept |
279 | { |
280 | __x += __n; |
281 | return __x; |
282 | } |
283 | |
284 | #if _LIBCPP_STD_VER14 <= 17 |
285 | template <class _It> |
286 | struct __is_cpp17_contiguous_iterator<__wrap_iter<_It> > : true_type {}; |
287 | #endif |
288 | |
289 | template <class _Iter> |
290 | _LIBCPP_CONSTEXPRconstexpr |
291 | decltype(_VSTDstd::__1::__to_address(declval<_Iter>())) |
292 | __to_address(__wrap_iter<_Iter> __w) _NOEXCEPTnoexcept { |
293 | return _VSTDstd::__1::__to_address(__w.base()); |
294 | } |
295 | |
296 | _LIBCPP_END_NAMESPACE_STD} } |
297 | |
298 | _LIBCPP_POP_MACROSpop_macro("min") pop_macro("max") |
299 | |
300 | #endif // _LIBCPP___ITERATOR_WRAP_ITER_H |
1 | //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines some vectorizer utilities. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_ANALYSIS_VECTORUTILS_H |
14 | #define LLVM_ANALYSIS_VECTORUTILS_H |
15 | |
16 | #include "llvm/ADT/MapVector.h" |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include "llvm/Analysis/LoopAccessAnalysis.h" |
19 | #include "llvm/Support/CheckedArithmetic.h" |
20 | |
21 | namespace llvm { |
22 | class TargetLibraryInfo; |
23 | |
24 | /// Describes the type of Parameters |
25 | enum class VFParamKind { |
26 | Vector, // No semantic information. |
27 | OMP_Linear, // declare simd linear(i) |
28 | OMP_LinearRef, // declare simd linear(ref(i)) |
29 | OMP_LinearVal, // declare simd linear(val(i)) |
30 | OMP_LinearUVal, // declare simd linear(uval(i)) |
31 | OMP_LinearPos, // declare simd linear(i:c) uniform(c) |
32 | OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c) |
33 | OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c) |
34 | OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c) |
35 | OMP_Uniform, // declare simd uniform(i) |
36 | GlobalPredicate, // Global logical predicate that acts on all lanes |
37 | // of the input and output mask concurrently. For |
38 | // example, it is implied by the `M` token in the |
39 | // Vector Function ABI mangled name. |
40 | Unknown |
41 | }; |
42 | |
43 | /// Describes the type of Instruction Set Architecture |
44 | enum class VFISAKind { |
45 | AdvancedSIMD, // AArch64 Advanced SIMD (NEON) |
46 | SVE, // AArch64 Scalable Vector Extension |
47 | SSE, // x86 SSE |
48 | AVX, // x86 AVX |
49 | AVX2, // x86 AVX2 |
50 | AVX512, // x86 AVX512 |
51 | LLVM, // LLVM internal ISA for functions that are not |
52 | // attached to an existing ABI via name mangling. |
53 | Unknown // Unknown ISA |
54 | }; |
55 | |
56 | /// Encapsulates information needed to describe a parameter. |
57 | /// |
58 | /// The description of the parameter is not linked directly to |
59 | /// OpenMP or any other vector function description. This structure |
60 | /// is extendible to handle other paradigms that describe vector |
61 | /// functions and their parameters. |
62 | struct VFParameter { |
63 | unsigned ParamPos; // Parameter Position in Scalar Function. |
64 | VFParamKind ParamKind; // Kind of Parameter. |
65 | int LinearStepOrPos = 0; // Step or Position of the Parameter. |
66 | Align Alignment = Align(); // Optional alignment in bytes, defaulted to 1. |
67 | |
68 | // Comparison operator. |
69 | bool operator==(const VFParameter &Other) const { |
70 | return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) == |
71 | std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos, |
72 | Other.Alignment); |
73 | } |
74 | }; |
75 | |
76 | /// Contains the information about the kind of vectorization |
77 | /// available. |
78 | /// |
79 | /// This object in independent on the paradigm used to |
80 | /// represent vector functions. in particular, it is not attached to |
81 | /// any target-specific ABI. |
82 | struct VFShape { |
83 | ElementCount VF; // Vectorization factor. |
84 | SmallVector<VFParameter, 8> Parameters; // List of parameter information. |
85 | // Comparison operator. |
86 | bool operator==(const VFShape &Other) const { |
87 | return std::tie(VF, Parameters) == std::tie(Other.VF, Other.Parameters); |
88 | } |
89 | |
90 | /// Update the parameter in position P.ParamPos to P. |
91 | void updateParam(VFParameter P) { |
92 | assert(P.ParamPos < Parameters.size() && "Invalid parameter position.")((void)0); |
93 | Parameters[P.ParamPos] = P; |
94 | assert(hasValidParameterList() && "Invalid parameter list")((void)0); |
95 | } |
96 | |
97 | // Retrieve the VFShape that can be used to map a (scalar) function to itself, |
98 | // with VF = 1. |
99 | static VFShape getScalarShape(const CallInst &CI) { |
100 | return VFShape::get(CI, ElementCount::getFixed(1), |
101 | /*HasGlobalPredicate*/ false); |
102 | } |
103 | |
104 | // Retrieve the basic vectorization shape of the function, where all |
105 | // parameters are mapped to VFParamKind::Vector with \p EC |
106 | // lanes. Specifies whether the function has a Global Predicate |
107 | // argument via \p HasGlobalPred. |
108 | static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) { |
109 | SmallVector<VFParameter, 8> Parameters; |
110 | for (unsigned I = 0; I < CI.arg_size(); ++I) |
111 | Parameters.push_back(VFParameter({I, VFParamKind::Vector})); |
112 | if (HasGlobalPred) |
113 | Parameters.push_back( |
114 | VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate})); |
115 | |
116 | return {EC, Parameters}; |
117 | } |
118 | /// Sanity check on the Parameters in the VFShape. |
119 | bool hasValidParameterList() const; |
120 | }; |
121 | |
122 | /// Holds the VFShape for a specific scalar to vector function mapping. |
123 | struct VFInfo { |
124 | VFShape Shape; /// Classification of the vector function. |
125 | std::string ScalarName; /// Scalar Function Name. |
126 | std::string VectorName; /// Vector Function Name associated to this VFInfo. |
127 | VFISAKind ISA; /// Instruction Set Architecture. |
128 | }; |
129 | |
130 | namespace VFABI { |
131 | /// LLVM Internal VFABI ISA token for vector functions. |
132 | static constexpr char const *_LLVM_ = "_LLVM_"; |
133 | /// Prefix for internal name redirection for vector function that |
134 | /// tells the compiler to scalarize the call using the scalar name |
135 | /// of the function. For example, a mangled name like |
136 | /// `_ZGV_LLVM_N2v_foo(_LLVM_Scalarize_foo)` would tell the |
137 | /// vectorizer to vectorize the scalar call `foo`, and to scalarize |
138 | /// it once vectorization is done. |
139 | static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_"; |
140 | |
141 | /// Function to construct a VFInfo out of a mangled names in the |
142 | /// following format: |
143 | /// |
144 | /// <VFABI_name>{(<redirection>)} |
145 | /// |
146 | /// where <VFABI_name> is the name of the vector function, mangled according |
147 | /// to the rules described in the Vector Function ABI of the target vector |
148 | /// extension (or <isa> from now on). The <VFABI_name> is in the following |
149 | /// format: |
150 | /// |
151 | /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)] |
152 | /// |
153 | /// This methods support demangling rules for the following <isa>: |
154 | /// |
155 | /// * AArch64: https://developer.arm.com/docs/101129/latest |
156 | /// |
157 | /// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and |
158 | /// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt |
159 | /// |
160 | /// \param MangledName -> input string in the format |
161 | /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]. |
162 | /// \param M -> Module used to retrieve informations about the vector |
163 | /// function that are not possible to retrieve from the mangled |
164 | /// name. At the moment, this parameter is needed only to retrieve the |
165 | /// Vectorization Factor of scalable vector functions from their |
166 | /// respective IR declarations. |
167 | Optional<VFInfo> tryDemangleForVFABI(StringRef MangledName, const Module &M); |
168 | |
169 | /// This routine mangles the given VectorName according to the LangRef |
170 | /// specification for vector-function-abi-variant attribute and is specific to |
171 | /// the TLI mappings. It is the responsibility of the caller to make sure that |
172 | /// this is only used if all parameters in the vector function are vector type. |
173 | /// This returned string holds scalar-to-vector mapping: |
174 | /// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>) |
175 | /// |
176 | /// where: |
177 | /// |
178 | /// <isa> = "_LLVM_" |
179 | /// <mask> = "N". Note: TLI does not support masked interfaces. |
180 | /// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor` |
181 | /// field of the `VecDesc` struct. If the number of lanes is scalable |
182 | /// then 'x' is printed instead. |
183 | /// <vparams> = "v", as many as are the numArgs. |
184 | /// <scalarname> = the name of the scalar function. |
185 | /// <vectorname> = the name of the vector function. |
186 | std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, |
187 | unsigned numArgs, ElementCount VF); |
188 | |
189 | /// Retrieve the `VFParamKind` from a string token. |
190 | VFParamKind getVFParamKindFromString(const StringRef Token); |
191 | |
192 | // Name of the attribute where the variant mappings are stored. |
193 | static constexpr char const *MappingsAttrName = "vector-function-abi-variant"; |
194 | |
195 | /// Populates a set of strings representing the Vector Function ABI variants |
196 | /// associated to the CallInst CI. If the CI does not contain the |
197 | /// vector-function-abi-variant attribute, we return without populating |
198 | /// VariantMappings, i.e. callers of getVectorVariantNames need not check for |
199 | /// the presence of the attribute (see InjectTLIMappings). |
200 | void getVectorVariantNames(const CallInst &CI, |
201 | SmallVectorImpl<std::string> &VariantMappings); |
202 | } // end namespace VFABI |
203 | |
204 | /// The Vector Function Database. |
205 | /// |
206 | /// Helper class used to find the vector functions associated to a |
207 | /// scalar CallInst. |
208 | class VFDatabase { |
209 | /// The Module of the CallInst CI. |
210 | const Module *M; |
211 | /// The CallInst instance being queried for scalar to vector mappings. |
212 | const CallInst &CI; |
213 | /// List of vector functions descriptors associated to the call |
214 | /// instruction. |
215 | const SmallVector<VFInfo, 8> ScalarToVectorMappings; |
216 | |
217 | /// Retrieve the scalar-to-vector mappings associated to the rule of |
218 | /// a vector Function ABI. |
219 | static void getVFABIMappings(const CallInst &CI, |
220 | SmallVectorImpl<VFInfo> &Mappings) { |
221 | if (!CI.getCalledFunction()) |
222 | return; |
223 | |
224 | const StringRef ScalarName = CI.getCalledFunction()->getName(); |
225 | |
226 | SmallVector<std::string, 8> ListOfStrings; |
227 | // The check for the vector-function-abi-variant attribute is done when |
228 | // retrieving the vector variant names here. |
229 | VFABI::getVectorVariantNames(CI, ListOfStrings); |
230 | if (ListOfStrings.empty()) |
231 | return; |
232 | for (const auto &MangledName : ListOfStrings) { |
233 | const Optional<VFInfo> Shape = |
234 | VFABI::tryDemangleForVFABI(MangledName, *(CI.getModule())); |
235 | // A match is found via scalar and vector names, and also by |
236 | // ensuring that the variant described in the attribute has a |
237 | // corresponding definition or declaration of the vector |
238 | // function in the Module M. |
239 | if (Shape.hasValue() && (Shape.getValue().ScalarName == ScalarName)) { |
240 | assert(CI.getModule()->getFunction(Shape.getValue().VectorName) &&((void)0) |
241 | "Vector function is missing.")((void)0); |
242 | Mappings.push_back(Shape.getValue()); |
243 | } |
244 | } |
245 | } |
246 | |
247 | public: |
248 | /// Retrieve all the VFInfo instances associated to the CallInst CI. |
249 | static SmallVector<VFInfo, 8> getMappings(const CallInst &CI) { |
250 | SmallVector<VFInfo, 8> Ret; |
251 | |
252 | // Get mappings from the Vector Function ABI variants. |
253 | getVFABIMappings(CI, Ret); |
254 | |
255 | // Other non-VFABI variants should be retrieved here. |
256 | |
257 | return Ret; |
258 | } |
259 | |
260 | /// Constructor, requires a CallInst instance. |
261 | VFDatabase(CallInst &CI) |
262 | : M(CI.getModule()), CI(CI), |
263 | ScalarToVectorMappings(VFDatabase::getMappings(CI)) {} |
264 | /// \defgroup VFDatabase query interface. |
265 | /// |
266 | /// @{ |
267 | /// Retrieve the Function with VFShape \p Shape. |
268 | Function *getVectorizedFunction(const VFShape &Shape) const { |
269 | if (Shape == VFShape::getScalarShape(CI)) |
270 | return CI.getCalledFunction(); |
271 | |
272 | for (const auto &Info : ScalarToVectorMappings) |
273 | if (Info.Shape == Shape) |
274 | return M->getFunction(Info.VectorName); |
275 | |
276 | return nullptr; |
277 | } |
278 | /// @} |
279 | }; |
280 | |
281 | template <typename T> class ArrayRef; |
282 | class DemandedBits; |
283 | class GetElementPtrInst; |
284 | template <typename InstTy> class InterleaveGroup; |
285 | class IRBuilderBase; |
286 | class Loop; |
287 | class ScalarEvolution; |
288 | class TargetTransformInfo; |
289 | class Type; |
290 | class Value; |
291 | |
292 | namespace Intrinsic { |
293 | typedef unsigned ID; |
294 | } |
295 | |
296 | /// A helper function for converting Scalar types to vector types. If |
297 | /// the incoming type is void, we return void. If the EC represents a |
298 | /// scalar, we return the scalar type. |
299 | inline Type *ToVectorTy(Type *Scalar, ElementCount EC) { |
300 | if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar()) |
301 | return Scalar; |
302 | return VectorType::get(Scalar, EC); |
303 | } |
304 | |
305 | inline Type *ToVectorTy(Type *Scalar, unsigned VF) { |
306 | return ToVectorTy(Scalar, ElementCount::getFixed(VF)); |
307 | } |
308 | |
309 | /// Identify if the intrinsic is trivially vectorizable. |
310 | /// This method returns true if the intrinsic's argument types are all scalars |
311 | /// for the scalar form of the intrinsic and all vectors (or scalars handled by |
312 | /// hasVectorInstrinsicScalarOpd) for the vector form of the intrinsic. |
313 | bool isTriviallyVectorizable(Intrinsic::ID ID); |
314 | |
315 | /// Identifies if the vector form of the intrinsic has a scalar operand. |
316 | bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx); |
317 | |
318 | /// Identifies if the vector form of the intrinsic has a scalar operand that has |
319 | /// an overloaded type. |
320 | bool hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID, |
321 | unsigned ScalarOpdIdx); |
322 | |
323 | /// Returns intrinsic ID for call. |
324 | /// For the input call instruction it finds mapping intrinsic and returns |
325 | /// its intrinsic ID, in case it does not found it return not_intrinsic. |
326 | Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, |
327 | const TargetLibraryInfo *TLI); |
328 | |
329 | /// Find the operand of the GEP that should be checked for consecutive |
330 | /// stores. This ignores trailing indices that have no effect on the final |
331 | /// pointer. |
332 | unsigned getGEPInductionOperand(const GetElementPtrInst *Gep); |
333 | |
334 | /// If the argument is a GEP, then returns the operand identified by |
335 | /// getGEPInductionOperand. However, if there is some other non-loop-invariant |
336 | /// operand, it returns that instead. |
337 | Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp); |
338 | |
339 | /// If a value has only one user that is a CastInst, return it. |
340 | Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty); |
341 | |
342 | /// Get the stride of a pointer access in a loop. Looks for symbolic |
343 | /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. |
344 | Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp); |
345 | |
346 | /// Given a vector and an element number, see if the scalar value is |
347 | /// already around as a register, for example if it were inserted then extracted |
348 | /// from the vector. |
349 | Value *findScalarElement(Value *V, unsigned EltNo); |
350 | |
351 | /// If all non-negative \p Mask elements are the same value, return that value. |
352 | /// If all elements are negative (undefined) or \p Mask contains different |
353 | /// non-negative values, return -1. |
354 | int getSplatIndex(ArrayRef<int> Mask); |
355 | |
356 | /// Get splat value if the input is a splat vector or return nullptr. |
357 | /// The value may be extracted from a splat constants vector or from |
358 | /// a sequence of instructions that broadcast a single value into a vector. |
359 | Value *getSplatValue(const Value *V); |
360 | |
361 | /// Return true if each element of the vector value \p V is poisoned or equal to |
362 | /// every other non-poisoned element. If an index element is specified, either |
363 | /// every element of the vector is poisoned or the element at that index is not |
364 | /// poisoned and equal to every other non-poisoned element. |
365 | /// This may be more powerful than the related getSplatValue() because it is |
366 | /// not limited by finding a scalar source value to a splatted vector. |
367 | bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0); |
368 | |
369 | /// Replace each shuffle mask index with the scaled sequential indices for an |
370 | /// equivalent mask of narrowed elements. Mask elements that are less than 0 |
371 | /// (sentinel values) are repeated in the output mask. |
372 | /// |
373 | /// Example with Scale = 4: |
374 | /// <4 x i32> <3, 2, 0, -1> --> |
375 | /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> |
376 | /// |
377 | /// This is the reverse process of widening shuffle mask elements, but it always |
378 | /// succeeds because the indexes can always be multiplied (scaled up) to map to |
379 | /// narrower vector elements. |
380 | void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask, |
381 | SmallVectorImpl<int> &ScaledMask); |
382 | |
383 | /// Try to transform a shuffle mask by replacing elements with the scaled index |
384 | /// for an equivalent mask of widened elements. If all mask elements that would |
385 | /// map to a wider element of the new mask are the same negative number |
386 | /// (sentinel value), that element of the new mask is the same value. If any |
387 | /// element in a given slice is negative and some other element in that slice is |
388 | /// not the same value, return false (partial matches with sentinel values are |
389 | /// not allowed). |
390 | /// |
391 | /// Example with Scale = 4: |
392 | /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> --> |
393 | /// <4 x i32> <3, 2, 0, -1> |
394 | /// |
395 | /// This is the reverse process of narrowing shuffle mask elements if it |
396 | /// succeeds. This transform is not always possible because indexes may not |
397 | /// divide evenly (scale down) to map to wider vector elements. |
398 | bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask, |
399 | SmallVectorImpl<int> &ScaledMask); |
400 | |
401 | /// Compute a map of integer instructions to their minimum legal type |
402 | /// size. |
403 | /// |
404 | /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int |
405 | /// type (e.g. i32) whenever arithmetic is performed on them. |
406 | /// |
407 | /// For targets with native i8 or i16 operations, usually InstCombine can shrink |
408 | /// the arithmetic type down again. However InstCombine refuses to create |
409 | /// illegal types, so for targets without i8 or i16 registers, the lengthening |
410 | /// and shrinking remains. |
411 | /// |
412 | /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when |
413 | /// their scalar equivalents do not, so during vectorization it is important to |
414 | /// remove these lengthens and truncates when deciding the profitability of |
415 | /// vectorization. |
416 | /// |
417 | /// This function analyzes the given range of instructions and determines the |
418 | /// minimum type size each can be converted to. It attempts to remove or |
419 | /// minimize type size changes across each def-use chain, so for example in the |
420 | /// following code: |
421 | /// |
422 | /// %1 = load i8, i8* |
423 | /// %2 = add i8 %1, 2 |
424 | /// %3 = load i16, i16* |
425 | /// %4 = zext i8 %2 to i32 |
426 | /// %5 = zext i16 %3 to i32 |
427 | /// %6 = add i32 %4, %5 |
428 | /// %7 = trunc i32 %6 to i16 |
429 | /// |
430 | /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes |
431 | /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}. |
432 | /// |
433 | /// If the optional TargetTransformInfo is provided, this function tries harder |
434 | /// to do less work by only looking at illegal types. |
435 | MapVector<Instruction*, uint64_t> |
436 | computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks, |
437 | DemandedBits &DB, |
438 | const TargetTransformInfo *TTI=nullptr); |
439 | |
440 | /// Compute the union of two access-group lists. |
441 | /// |
442 | /// If the list contains just one access group, it is returned directly. If the |
443 | /// list is empty, returns nullptr. |
444 | MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2); |
445 | |
446 | /// Compute the access-group list of access groups that @p Inst1 and @p Inst2 |
447 | /// are both in. If either instruction does not access memory at all, it is |
448 | /// considered to be in every list. |
449 | /// |
450 | /// If the list contains just one access group, it is returned directly. If the |
451 | /// list is empty, returns nullptr. |
452 | MDNode *intersectAccessGroups(const Instruction *Inst1, |
453 | const Instruction *Inst2); |
454 | |
455 | /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, |
456 | /// MD_nontemporal, MD_access_group]. |
457 | /// For K in Kinds, we get the MDNode for K from each of the |
458 | /// elements of VL, compute their "intersection" (i.e., the most generic |
459 | /// metadata value that covers all of the individual values), and set I's |
460 | /// metadata for M equal to the intersection value. |
461 | /// |
462 | /// This function always sets a (possibly null) value for each K in Kinds. |
463 | Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL); |
464 | |
465 | /// Create a mask that filters the members of an interleave group where there |
466 | /// are gaps. |
467 | /// |
468 | /// For example, the mask for \p Group with interleave-factor 3 |
469 | /// and \p VF 4, that has only its first member present is: |
470 | /// |
471 | /// <1,0,0,1,0,0,1,0,0,1,0,0> |
472 | /// |
473 | /// Note: The result is a mask of 0's and 1's, as opposed to the other |
474 | /// create[*]Mask() utilities which create a shuffle mask (mask that |
475 | /// consists of indices). |
476 | Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, |
477 | const InterleaveGroup<Instruction> &Group); |
478 | |
479 | /// Create a mask with replicated elements. |
480 | /// |
481 | /// This function creates a shuffle mask for replicating each of the \p VF |
482 | /// elements in a vector \p ReplicationFactor times. It can be used to |
483 | /// transform a mask of \p VF elements into a mask of |
484 | /// \p VF * \p ReplicationFactor elements used by a predicated |
485 | /// interleaved-group of loads/stores whose Interleaved-factor == |
486 | /// \p ReplicationFactor. |
487 | /// |
488 | /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is: |
489 | /// |
490 | /// <0,0,0,1,1,1,2,2,2,3,3,3> |
491 | llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor, |
492 | unsigned VF); |
493 | |
494 | /// Create an interleave shuffle mask. |
495 | /// |
496 | /// This function creates a shuffle mask for interleaving \p NumVecs vectors of |
497 | /// vectorization factor \p VF into a single wide vector. The mask is of the |
498 | /// form: |
499 | /// |
500 | /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...> |
501 | /// |
502 | /// For example, the mask for VF = 4 and NumVecs = 2 is: |
503 | /// |
504 | /// <0, 4, 1, 5, 2, 6, 3, 7>. |
505 | llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs); |
506 | |
507 | /// Create a stride shuffle mask. |
508 | /// |
509 | /// This function creates a shuffle mask whose elements begin at \p Start and |
510 | /// are incremented by \p Stride. The mask can be used to deinterleave an |
511 | /// interleaved vector into separate vectors of vectorization factor \p VF. The |
512 | /// mask is of the form: |
513 | /// |
514 | /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)> |
515 | /// |
516 | /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is: |
517 | /// |
518 | /// <0, 2, 4, 6> |
519 | llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride, |
520 | unsigned VF); |
521 | |
522 | /// Create a sequential shuffle mask. |
523 | /// |
524 | /// This function creates shuffle mask whose elements are sequential and begin |
525 | /// at \p Start. The mask contains \p NumInts integers and is padded with \p |
526 | /// NumUndefs undef values. The mask is of the form: |
527 | /// |
528 | /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs> |
529 | /// |
530 | /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is: |
531 | /// |
532 | /// <0, 1, 2, 3, undef, undef, undef, undef> |
533 | llvm::SmallVector<int, 16> |
534 | createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs); |
535 | |
536 | /// Concatenate a list of vectors. |
537 | /// |
538 | /// This function generates code that concatenate the vectors in \p Vecs into a |
539 | /// single large vector. The number of vectors should be greater than one, and |
540 | /// their element types should be the same. The number of elements in the |
541 | /// vectors should also be the same; however, if the last vector has fewer |
542 | /// elements, it will be padded with undefs. |
543 | Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs); |
544 | |
545 | /// Given a mask vector of i1, Return true if all of the elements of this |
546 | /// predicate mask are known to be false or undef. That is, return true if all |
547 | /// lanes can be assumed inactive. |
548 | bool maskIsAllZeroOrUndef(Value *Mask); |
549 | |
550 | /// Given a mask vector of i1, Return true if all of the elements of this |
551 | /// predicate mask are known to be true or undef. That is, return true if all |
552 | /// lanes can be assumed active. |
553 | bool maskIsAllOneOrUndef(Value *Mask); |
554 | |
555 | /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) |
556 | /// for each lane which may be active. |
557 | APInt possiblyDemandedEltsInMask(Value *Mask); |
558 | |
559 | /// The group of interleaved loads/stores sharing the same stride and |
560 | /// close to each other. |
561 | /// |
562 | /// Each member in this group has an index starting from 0, and the largest |
563 | /// index should be less than interleaved factor, which is equal to the absolute |
564 | /// value of the access's stride. |
565 | /// |
566 | /// E.g. An interleaved load group of factor 4: |
567 | /// for (unsigned i = 0; i < 1024; i+=4) { |
568 | /// a = A[i]; // Member of index 0 |
569 | /// b = A[i+1]; // Member of index 1 |
570 | /// d = A[i+3]; // Member of index 3 |
571 | /// ... |
572 | /// } |
573 | /// |
574 | /// An interleaved store group of factor 4: |
575 | /// for (unsigned i = 0; i < 1024; i+=4) { |
576 | /// ... |
577 | /// A[i] = a; // Member of index 0 |
578 | /// A[i+1] = b; // Member of index 1 |
579 | /// A[i+2] = c; // Member of index 2 |
580 | /// A[i+3] = d; // Member of index 3 |
581 | /// } |
582 | /// |
583 | /// Note: the interleaved load group could have gaps (missing members), but |
584 | /// the interleaved store group doesn't allow gaps. |
585 | template <typename InstTy> class InterleaveGroup { |
586 | public: |
587 | InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment) |
588 | : Factor(Factor), Reverse(Reverse), Alignment(Alignment), |
589 | InsertPos(nullptr) {} |
590 | |
591 | InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment) |
592 | : Alignment(Alignment), InsertPos(Instr) { |
593 | Factor = std::abs(Stride); |
594 | assert(Factor > 1 && "Invalid interleave factor")((void)0); |
595 | |
596 | Reverse = Stride < 0; |
597 | Members[0] = Instr; |
598 | } |
599 | |
600 | bool isReverse() const { return Reverse; } |
601 | uint32_t getFactor() const { return Factor; } |
602 | Align getAlign() const { return Alignment; } |
603 | uint32_t getNumMembers() const { return Members.size(); } |
604 | |
605 | /// Try to insert a new member \p Instr with index \p Index and |
606 | /// alignment \p NewAlign. The index is related to the leader and it could be |
607 | /// negative if it is the new leader. |
608 | /// |
609 | /// \returns false if the instruction doesn't belong to the group. |
610 | bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) { |
611 | // Make sure the key fits in an int32_t. |
612 | Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey); |
613 | if (!MaybeKey) |
614 | return false; |
615 | int32_t Key = *MaybeKey; |
616 | |
617 | // Skip if the key is used for either the tombstone or empty special values. |
618 | if (DenseMapInfo<int32_t>::getTombstoneKey() == Key || |
619 | DenseMapInfo<int32_t>::getEmptyKey() == Key) |
620 | return false; |
621 | |
622 | // Skip if there is already a member with the same index. |
623 | if (Members.find(Key) != Members.end()) |
624 | return false; |
625 | |
626 | if (Key > LargestKey) { |
627 | // The largest index is always less than the interleave factor. |
628 | if (Index >= static_cast<int32_t>(Factor)) |
629 | return false; |
630 | |
631 | LargestKey = Key; |
632 | } else if (Key < SmallestKey) { |
633 | |
634 | // Make sure the largest index fits in an int32_t. |
635 | Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key); |
636 | if (!MaybeLargestIndex) |
637 | return false; |
638 | |
639 | // The largest index is always less than the interleave factor. |
640 | if (*MaybeLargestIndex >= static_cast<int64_t>(Factor)) |
641 | return false; |
642 | |
643 | SmallestKey = Key; |
644 | } |
645 | |
646 | // It's always safe to select the minimum alignment. |
647 | Alignment = std::min(Alignment, NewAlign); |
648 | Members[Key] = Instr; |
649 | return true; |
650 | } |
651 | |
652 | /// Get the member with the given index \p Index |
653 | /// |
654 | /// \returns nullptr if contains no such member. |
655 | InstTy *getMember(uint32_t Index) const { |
656 | int32_t Key = SmallestKey + Index; |
657 | return Members.lookup(Key); |
658 | } |
659 | |
660 | /// Get the index for the given member. Unlike the key in the member |
661 | /// map, the index starts from 0. |
662 | uint32_t getIndex(const InstTy *Instr) const { |
663 | for (auto I : Members) { |
664 | if (I.second == Instr) |
665 | return I.first - SmallestKey; |
666 | } |
667 | |
668 | llvm_unreachable("InterleaveGroup contains no such member")__builtin_unreachable(); |
669 | } |
670 | |
671 | InstTy *getInsertPos() const { return InsertPos; } |
672 | void setInsertPos(InstTy *Inst) { InsertPos = Inst; } |
673 | |
674 | /// Add metadata (e.g. alias info) from the instructions in this group to \p |
675 | /// NewInst. |
676 | /// |
677 | /// FIXME: this function currently does not add noalias metadata a'la |
678 | /// addNewMedata. To do that we need to compute the intersection of the |
679 | /// noalias info from all members. |
680 | void addMetadata(InstTy *NewInst) const; |
681 | |
682 | /// Returns true if this Group requires a scalar iteration to handle gaps. |
683 | bool requiresScalarEpilogue() const { |
684 | // If the last member of the Group exists, then a scalar epilog is not |
685 | // needed for this group. |
686 | if (getMember(getFactor() - 1)) |
687 | return false; |
688 | |
689 | // We have a group with gaps. It therefore cannot be a group of stores, |
690 | // and it can't be a reversed access, because such groups get invalidated. |
691 | assert(!getMember(0)->mayWriteToMemory() &&((void)0) |
692 | "Group should have been invalidated")((void)0); |
693 | assert(!isReverse() && "Group should have been invalidated")((void)0); |
694 | |
695 | // This is a group of loads, with gaps, and without a last-member |
696 | return true; |
697 | } |
698 | |
699 | private: |
700 | uint32_t Factor; // Interleave Factor. |
701 | bool Reverse; |
702 | Align Alignment; |
703 | DenseMap<int32_t, InstTy *> Members; |
704 | int32_t SmallestKey = 0; |
705 | int32_t LargestKey = 0; |
706 | |
707 | // To avoid breaking dependences, vectorized instructions of an interleave |
708 | // group should be inserted at either the first load or the last store in |
709 | // program order. |
710 | // |
711 | // E.g. %even = load i32 // Insert Position |
712 | // %add = add i32 %even // Use of %even |
713 | // %odd = load i32 |
714 | // |
715 | // store i32 %even |
716 | // %odd = add i32 // Def of %odd |
717 | // store i32 %odd // Insert Position |
718 | InstTy *InsertPos; |
719 | }; |
720 | |
721 | /// Drive the analysis of interleaved memory accesses in the loop. |
722 | /// |
723 | /// Use this class to analyze interleaved accesses only when we can vectorize |
724 | /// a loop. Otherwise it's meaningless to do analysis as the vectorization |
725 | /// on interleaved accesses is unsafe. |
726 | /// |
727 | /// The analysis collects interleave groups and records the relationships |
728 | /// between the member and the group in a map. |
729 | class InterleavedAccessInfo { |
730 | public: |
731 | InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, |
732 | DominatorTree *DT, LoopInfo *LI, |
733 | const LoopAccessInfo *LAI) |
734 | : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {} |
735 | |
736 | ~InterleavedAccessInfo() { invalidateGroups(); } |
737 | |
738 | /// Analyze the interleaved accesses and collect them in interleave |
739 | /// groups. Substitute symbolic strides using \p Strides. |
740 | /// Consider also predicated loads/stores in the analysis if |
741 | /// \p EnableMaskedInterleavedGroup is true. |
742 | void analyzeInterleaving(bool EnableMaskedInterleavedGroup); |
743 | |
744 | /// Invalidate groups, e.g., in case all blocks in loop will be predicated |
745 | /// contrary to original assumption. Although we currently prevent group |
746 | /// formation for predicated accesses, we may be able to relax this limitation |
747 | /// in the future once we handle more complicated blocks. Returns true if any |
748 | /// groups were invalidated. |
749 | bool invalidateGroups() { |
750 | if (InterleaveGroups.empty()) { |
751 | assert(((void)0) |
752 | !RequiresScalarEpilogue &&((void)0) |
753 | "RequiresScalarEpilog should not be set without interleave groups")((void)0); |
754 | return false; |
755 | } |
756 | |
757 | InterleaveGroupMap.clear(); |
758 | for (auto *Ptr : InterleaveGroups) |
759 | delete Ptr; |
760 | InterleaveGroups.clear(); |
761 | RequiresScalarEpilogue = false; |
762 | return true; |
763 | } |
764 | |
765 | /// Check if \p Instr belongs to any interleave group. |
766 | bool isInterleaved(Instruction *Instr) const { |
767 | return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end(); |
768 | } |
769 | |
770 | /// Get the interleave group that \p Instr belongs to. |
771 | /// |
772 | /// \returns nullptr if doesn't have such group. |
773 | InterleaveGroup<Instruction> * |
774 | getInterleaveGroup(const Instruction *Instr) const { |
775 | return InterleaveGroupMap.lookup(Instr); |
776 | } |
777 | |
778 | iterator_range<SmallPtrSetIterator<llvm::InterleaveGroup<Instruction> *>> |
779 | getInterleaveGroups() { |
780 | return make_range(InterleaveGroups.begin(), InterleaveGroups.end()); |
781 | } |
782 | |
783 | /// Returns true if an interleaved group that may access memory |
784 | /// out-of-bounds requires a scalar epilogue iteration for correctness. |
785 | bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; } |
786 | |
787 | /// Invalidate groups that require a scalar epilogue (due to gaps). This can |
788 | /// happen when optimizing for size forbids a scalar epilogue, and the gap |
789 | /// cannot be filtered by masking the load/store. |
790 | void invalidateGroupsRequiringScalarEpilogue(); |
791 | |
792 | private: |
793 | /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. |
794 | /// Simplifies SCEV expressions in the context of existing SCEV assumptions. |
795 | /// The interleaved access analysis can also add new predicates (for example |
796 | /// by versioning strides of pointers). |
797 | PredicatedScalarEvolution &PSE; |
798 | |
799 | Loop *TheLoop; |
800 | DominatorTree *DT; |
801 | LoopInfo *LI; |
802 | const LoopAccessInfo *LAI; |
803 | |
804 | /// True if the loop may contain non-reversed interleaved groups with |
805 | /// out-of-bounds accesses. We ensure we don't speculatively access memory |
806 | /// out-of-bounds by executing at least one scalar epilogue iteration. |
807 | bool RequiresScalarEpilogue = false; |
808 | |
809 | /// Holds the relationships between the members and the interleave group. |
810 | DenseMap<Instruction *, InterleaveGroup<Instruction> *> InterleaveGroupMap; |
811 | |
812 | SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups; |
813 | |
814 | /// Holds dependences among the memory accesses in the loop. It maps a source |
815 | /// access to a set of dependent sink accesses. |
816 | DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences; |
817 | |
818 | /// The descriptor for a strided memory access. |
819 | struct StrideDescriptor { |
820 | StrideDescriptor() = default; |
821 | StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size, |
822 | Align Alignment) |
823 | : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {} |
824 | |
825 | // The access's stride. It is negative for a reverse access. |
826 | int64_t Stride = 0; |
827 | |
828 | // The scalar expression of this access. |
829 | const SCEV *Scev = nullptr; |
830 | |
831 | // The size of the memory object. |
832 | uint64_t Size = 0; |
833 | |
834 | // The alignment of this access. |
835 | Align Alignment; |
836 | }; |
837 | |
838 | /// A type for holding instructions and their stride descriptors. |
839 | using StrideEntry = std::pair<Instruction *, StrideDescriptor>; |
840 | |
841 | /// Create a new interleave group with the given instruction \p Instr, |
842 | /// stride \p Stride and alignment \p Align. |
843 | /// |
844 | /// \returns the newly created interleave group. |
845 | InterleaveGroup<Instruction> * |
846 | createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) { |
847 | assert(!InterleaveGroupMap.count(Instr) &&((void)0) |
848 | "Already in an interleaved access group")((void)0); |
849 | InterleaveGroupMap[Instr] = |
850 | new InterleaveGroup<Instruction>(Instr, Stride, Alignment); |
851 | InterleaveGroups.insert(InterleaveGroupMap[Instr]); |
852 | return InterleaveGroupMap[Instr]; |
853 | } |
854 | |
855 | /// Release the group and remove all the relationships. |
856 | void releaseGroup(InterleaveGroup<Instruction> *Group) { |
857 | for (unsigned i = 0; i < Group->getFactor(); i++) |
858 | if (Instruction *Member = Group->getMember(i)) |
859 | InterleaveGroupMap.erase(Member); |
860 | |
861 | InterleaveGroups.erase(Group); |
862 | delete Group; |
863 | } |
864 | |
865 | /// Collect all the accesses with a constant stride in program order. |
866 | void collectConstStrideAccesses( |
867 | MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo, |
868 | const ValueToValueMap &Strides); |
869 | |
870 | /// Returns true if \p Stride is allowed in an interleaved group. |
871 | static bool isStrided(int Stride); |
872 | |
873 | /// Returns true if \p BB is a predicated block. |
874 | bool isPredicated(BasicBlock *BB) const { |
875 | return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); |
876 | } |
877 | |
878 | /// Returns true if LoopAccessInfo can be used for dependence queries. |
879 | bool areDependencesValid() const { |
880 | return LAI && LAI->getDepChecker().getDependences(); |
881 | } |
882 | |
883 | /// Returns true if memory accesses \p A and \p B can be reordered, if |
884 | /// necessary, when constructing interleaved groups. |
885 | /// |
886 | /// \p A must precede \p B in program order. We return false if reordering is |
887 | /// not necessary or is prevented because \p A and \p B may be dependent. |
888 | bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A, |
889 | StrideEntry *B) const { |
890 | // Code motion for interleaved accesses can potentially hoist strided loads |
891 | // and sink strided stores. The code below checks the legality of the |
892 | // following two conditions: |
893 | // |
894 | // 1. Potentially moving a strided load (B) before any store (A) that |
895 | // precedes B, or |
896 | // |
897 | // 2. Potentially moving a strided store (A) after any load or store (B) |
898 | // that A precedes. |
899 | // |
900 | // It's legal to reorder A and B if we know there isn't a dependence from A |
901 | // to B. Note that this determination is conservative since some |
902 | // dependences could potentially be reordered safely. |
903 | |
904 | // A is potentially the source of a dependence. |
905 | auto *Src = A->first; |
906 | auto SrcDes = A->second; |
907 | |
908 | // B is potentially the sink of a dependence. |
909 | auto *Sink = B->first; |
910 | auto SinkDes = B->second; |
911 | |
912 | // Code motion for interleaved accesses can't violate WAR dependences. |
913 | // Thus, reordering is legal if the source isn't a write. |
914 | if (!Src->mayWriteToMemory()) |
915 | return true; |
916 | |
917 | // At least one of the accesses must be strided. |
918 | if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride)) |
919 | return true; |
920 | |
921 | // If dependence information is not available from LoopAccessInfo, |
922 | // conservatively assume the instructions can't be reordered. |
923 | if (!areDependencesValid()) |
924 | return false; |
925 | |
926 | // If we know there is a dependence from source to sink, assume the |
927 | // instructions can't be reordered. Otherwise, reordering is legal. |
928 | return Dependences.find(Src) == Dependences.end() || |
929 | !Dependences.lookup(Src).count(Sink); |
930 | } |
931 | |
932 | /// Collect the dependences from LoopAccessInfo. |
933 | /// |
934 | /// We process the dependences once during the interleaved access analysis to |
935 | /// enable constant-time dependence queries. |
936 | void collectDependences() { |
937 | if (!areDependencesValid()) |
938 | return; |
939 | auto *Deps = LAI->getDepChecker().getDependences(); |
940 | for (auto Dep : *Deps) |
941 | Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI)); |
942 | } |
943 | }; |
944 | |
945 | } // llvm namespace |
946 | |
947 | #endif |
1 | //===- llvm/ADT/DenseMap.h - Dense probed hash table ------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the DenseMap class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_ADT_DENSEMAP_H |
14 | #define LLVM_ADT_DENSEMAP_H |
15 | |
16 | #include "llvm/ADT/DenseMapInfo.h" |
17 | #include "llvm/ADT/EpochTracker.h" |
18 | #include "llvm/Support/AlignOf.h" |
19 | #include "llvm/Support/Compiler.h" |
20 | #include "llvm/Support/MathExtras.h" |
21 | #include "llvm/Support/MemAlloc.h" |
22 | #include "llvm/Support/ReverseIteration.h" |
23 | #include "llvm/Support/type_traits.h" |
24 | #include <algorithm> |
25 | #include <cassert> |
26 | #include <cstddef> |
27 | #include <cstring> |
28 | #include <initializer_list> |
29 | #include <iterator> |
30 | #include <new> |
31 | #include <type_traits> |
32 | #include <utility> |
33 | |
34 | namespace llvm { |
35 | |
36 | namespace detail { |
37 | |
38 | // We extend a pair to allow users to override the bucket type with their own |
39 | // implementation without requiring two members. |
40 | template <typename KeyT, typename ValueT> |
41 | struct DenseMapPair : public std::pair<KeyT, ValueT> { |
42 | using std::pair<KeyT, ValueT>::pair; |
43 | |
44 | KeyT &getFirst() { return std::pair<KeyT, ValueT>::first; } |
45 | const KeyT &getFirst() const { return std::pair<KeyT, ValueT>::first; } |
46 | ValueT &getSecond() { return std::pair<KeyT, ValueT>::second; } |
47 | const ValueT &getSecond() const { return std::pair<KeyT, ValueT>::second; } |
48 | }; |
49 | |
50 | } // end namespace detail |
51 | |
52 | template <typename KeyT, typename ValueT, |
53 | typename KeyInfoT = DenseMapInfo<KeyT>, |
54 | typename Bucket = llvm::detail::DenseMapPair<KeyT, ValueT>, |
55 | bool IsConst = false> |
56 | class DenseMapIterator; |
57 | |
58 | template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT, |
59 | typename BucketT> |
60 | class DenseMapBase : public DebugEpochBase { |
61 | template <typename T> |
62 | using const_arg_type_t = typename const_pointer_or_const_ref<T>::type; |
63 | |
64 | public: |
65 | using size_type = unsigned; |
66 | using key_type = KeyT; |
67 | using mapped_type = ValueT; |
68 | using value_type = BucketT; |
69 | |
70 | using iterator = DenseMapIterator<KeyT, ValueT, KeyInfoT, BucketT>; |
71 | using const_iterator = |
72 | DenseMapIterator<KeyT, ValueT, KeyInfoT, BucketT, true>; |
73 | |
74 | inline iterator begin() { |
75 | // When the map is empty, avoid the overhead of advancing/retreating past |
76 | // empty buckets. |
77 | if (empty()) |
78 | return end(); |
79 | if (shouldReverseIterate<KeyT>()) |
80 | return makeIterator(getBucketsEnd() - 1, getBuckets(), *this); |
81 | return makeIterator(getBuckets(), getBucketsEnd(), *this); |
82 | } |
83 | inline iterator end() { |
84 | return makeIterator(getBucketsEnd(), getBucketsEnd(), *this, true); |
85 | } |
86 | inline const_iterator begin() const { |
87 | if (empty()) |
88 | return end(); |
89 | if (shouldReverseIterate<KeyT>()) |
90 | return makeConstIterator(getBucketsEnd() - 1, getBuckets(), *this); |
91 | return makeConstIterator(getBuckets(), getBucketsEnd(), *this); |
92 | } |
93 | inline const_iterator end() const { |
94 | return makeConstIterator(getBucketsEnd(), getBucketsEnd(), *this, true); |
95 | } |
96 | |
97 | LLVM_NODISCARD[[clang::warn_unused_result]] bool empty() const { |
98 | return getNumEntries() == 0; |
99 | } |
100 | unsigned size() const { return getNumEntries(); } |
101 | |
102 | /// Grow the densemap so that it can contain at least \p NumEntries items |
103 | /// before resizing again. |
104 | void reserve(size_type NumEntries) { |
105 | auto NumBuckets = getMinBucketToReserveForEntries(NumEntries); |
106 | incrementEpoch(); |
107 | if (NumBuckets > getNumBuckets()) |
108 | grow(NumBuckets); |
109 | } |
110 | |
111 | void clear() { |
112 | incrementEpoch(); |
113 | if (getNumEntries() == 0 && getNumTombstones() == 0) return; |
114 | |
115 | // If the capacity of the array is huge, and the # elements used is small, |
116 | // shrink the array. |
117 | if (getNumEntries() * 4 < getNumBuckets() && getNumBuckets() > 64) { |
118 | shrink_and_clear(); |
119 | return; |
120 | } |
121 | |
122 | const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey(); |
123 | if (std::is_trivially_destructible<ValueT>::value) { |
124 | // Use a simpler loop when values don't need destruction. |
125 | for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) |
126 | P->getFirst() = EmptyKey; |
127 | } else { |
128 | unsigned NumEntries = getNumEntries(); |
129 | for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) { |
130 | if (!KeyInfoT::isEqual(P->getFirst(), EmptyKey)) { |
131 | if (!KeyInfoT::isEqual(P->getFirst(), TombstoneKey)) { |
132 | P->getSecond().~ValueT(); |
133 | --NumEntries; |
134 | } |
135 | P->getFirst() = EmptyKey; |
136 | } |
137 | } |
138 | assert(NumEntries == 0 && "Node count imbalance!")((void)0); |
139 | } |
140 | setNumEntries(0); |
141 | setNumTombstones(0); |
142 | } |
143 | |
144 | /// Return 1 if the specified key is in the map, 0 otherwise. |
145 | size_type count(const_arg_type_t<KeyT> Val) const { |
146 | const BucketT *TheBucket; |
147 | return LookupBucketFor(Val, TheBucket) ? 1 : 0; |
148 | } |
149 | |
150 | iterator find(const_arg_type_t<KeyT> Val) { |
151 | BucketT *TheBucket; |
152 | if (LookupBucketFor(Val, TheBucket)) |
153 | return makeIterator(TheBucket, |
154 | shouldReverseIterate<KeyT>() ? getBuckets() |
155 | : getBucketsEnd(), |
156 | *this, true); |
157 | return end(); |
158 | } |
159 | const_iterator find(const_arg_type_t<KeyT> Val) const { |
160 | const BucketT *TheBucket; |
161 | if (LookupBucketFor(Val, TheBucket)) |
162 | return makeConstIterator(TheBucket, |
163 | shouldReverseIterate<KeyT>() ? getBuckets() |
164 | : getBucketsEnd(), |
165 | *this, true); |
166 | return end(); |
167 | } |
168 | |
169 | /// Alternate version of find() which allows a different, and possibly |
170 | /// less expensive, key type. |
171 | /// The DenseMapInfo is responsible for supplying methods |
172 | /// getHashValue(LookupKeyT) and isEqual(LookupKeyT, KeyT) for each key |
173 | /// type used. |
174 | template<class LookupKeyT> |
175 | iterator find_as(const LookupKeyT &Val) { |
176 | BucketT *TheBucket; |
177 | if (LookupBucketFor(Val, TheBucket)) |
178 | return makeIterator(TheBucket, |
179 | shouldReverseIterate<KeyT>() ? getBuckets() |
180 | : getBucketsEnd(), |
181 | *this, true); |
182 | return end(); |
183 | } |
184 | template<class LookupKeyT> |
185 | const_iterator find_as(const LookupKeyT &Val) const { |
186 | const BucketT *TheBucket; |
187 | if (LookupBucketFor(Val, TheBucket)) |
188 | return makeConstIterator(TheBucket, |
189 | shouldReverseIterate<KeyT>() ? getBuckets() |
190 | : getBucketsEnd(), |
191 | *this, true); |
192 | return end(); |
193 | } |
194 | |
195 | /// lookup - Return the entry for the specified key, or a default |
196 | /// constructed value if no such entry exists. |
197 | ValueT lookup(const_arg_type_t<KeyT> Val) const { |
198 | const BucketT *TheBucket; |
199 | if (LookupBucketFor(Val, TheBucket)) |
200 | return TheBucket->getSecond(); |
201 | return ValueT(); |
202 | } |
203 | |
204 | // Inserts key,value pair into the map if the key isn't already in the map. |
205 | // If the key is already in the map, it returns false and doesn't update the |
206 | // value. |
207 | std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &KV) { |
208 | return try_emplace(KV.first, KV.second); |
209 | } |
210 | |
211 | // Inserts key,value pair into the map if the key isn't already in the map. |
212 | // If the key is already in the map, it returns false and doesn't update the |
213 | // value. |
214 | std::pair<iterator, bool> insert(std::pair<KeyT, ValueT> &&KV) { |
215 | return try_emplace(std::move(KV.first), std::move(KV.second)); |
216 | } |
217 | |
218 | // Inserts key,value pair into the map if the key isn't already in the map. |
219 | // The value is constructed in-place if the key is not in the map, otherwise |
220 | // it is not moved. |
221 | template <typename... Ts> |
222 | std::pair<iterator, bool> try_emplace(KeyT &&Key, Ts &&... Args) { |
223 | BucketT *TheBucket; |
224 | if (LookupBucketFor(Key, TheBucket)) |
225 | return std::make_pair(makeIterator(TheBucket, |
226 | shouldReverseIterate<KeyT>() |
227 | ? getBuckets() |
228 | : getBucketsEnd(), |
229 | *this, true), |
230 | false); // Already in map. |
231 | |
232 | // Otherwise, insert the new element. |
233 | TheBucket = |
234 | InsertIntoBucket(TheBucket, std::move(Key), std::forward<Ts>(Args)...); |
235 | return std::make_pair(makeIterator(TheBucket, |
236 | shouldReverseIterate<KeyT>() |
237 | ? getBuckets() |
238 | : getBucketsEnd(), |
239 | *this, true), |
240 | true); |
241 | } |
242 | |
243 | // Inserts key,value pair into the map if the key isn't already in the map. |
244 | // The value is constructed in-place if the key is not in the map, otherwise |
245 | // it is not moved. |
246 | template <typename... Ts> |
247 | std::pair<iterator, bool> try_emplace(const KeyT &Key, Ts &&... Args) { |
248 | BucketT *TheBucket; |
249 | if (LookupBucketFor(Key, TheBucket)) |
250 | return std::make_pair(makeIterator(TheBucket, |
251 | shouldReverseIterate<KeyT>() |
252 | ? getBuckets() |
253 | : getBucketsEnd(), |
254 | *this, true), |
255 | false); // Already in map. |
256 | |
257 | // Otherwise, insert the new element. |
258 | TheBucket = InsertIntoBucket(TheBucket, Key, std::forward<Ts>(Args)...); |
259 | return std::make_pair(makeIterator(TheBucket, |
260 | shouldReverseIterate<KeyT>() |
261 | ? getBuckets() |
262 | : getBucketsEnd(), |
263 | *this, true), |
264 | true); |
265 | } |
266 | |
267 | /// Alternate version of insert() which allows a different, and possibly |
268 | /// less expensive, key type. |
269 | /// The DenseMapInfo is responsible for supplying methods |
270 | /// getHashValue(LookupKeyT) and isEqual(LookupKeyT, KeyT) for each key |
271 | /// type used. |
272 | template <typename LookupKeyT> |
273 | std::pair<iterator, bool> insert_as(std::pair<KeyT, ValueT> &&KV, |
274 | const LookupKeyT &Val) { |
275 | BucketT *TheBucket; |
276 | if (LookupBucketFor(Val, TheBucket)) |
277 | return std::make_pair(makeIterator(TheBucket, |
278 | shouldReverseIterate<KeyT>() |
279 | ? getBuckets() |
280 | : getBucketsEnd(), |
281 | *this, true), |
282 | false); // Already in map. |
283 | |
284 | // Otherwise, insert the new element. |
285 | TheBucket = InsertIntoBucketWithLookup(TheBucket, std::move(KV.first), |
286 | std::move(KV.second), Val); |
287 | return std::make_pair(makeIterator(TheBucket, |
288 | shouldReverseIterate<KeyT>() |
289 | ? getBuckets() |
290 | : getBucketsEnd(), |
291 | *this, true), |
292 | true); |
293 | } |
294 | |
295 | /// insert - Range insertion of pairs. |
296 | template<typename InputIt> |
297 | void insert(InputIt I, InputIt E) { |
298 | for (; I != E; ++I) |
299 | insert(*I); |
300 | } |
301 | |
302 | bool erase(const KeyT &Val) { |
303 | BucketT *TheBucket; |
304 | if (!LookupBucketFor(Val, TheBucket)) |
305 | return false; // not in map. |
306 | |
307 | TheBucket->getSecond().~ValueT(); |
308 | TheBucket->getFirst() = getTombstoneKey(); |
309 | decrementNumEntries(); |
310 | incrementNumTombstones(); |
311 | return true; |
312 | } |
313 | void erase(iterator I) { |
314 | BucketT *TheBucket = &*I; |
315 | TheBucket->getSecond().~ValueT(); |
316 | TheBucket->getFirst() = getTombstoneKey(); |
317 | decrementNumEntries(); |
318 | incrementNumTombstones(); |
319 | } |
320 | |
321 | value_type& FindAndConstruct(const KeyT &Key) { |
322 | BucketT *TheBucket; |
323 | if (LookupBucketFor(Key, TheBucket)) |
324 | return *TheBucket; |
325 | |
326 | return *InsertIntoBucket(TheBucket, Key); |
327 | } |
328 | |
329 | ValueT &operator[](const KeyT &Key) { |
330 | return FindAndConstruct(Key).second; |
331 | } |
332 | |
333 | value_type& FindAndConstruct(KeyT &&Key) { |
334 | BucketT *TheBucket; |
335 | if (LookupBucketFor(Key, TheBucket)) |
336 | return *TheBucket; |
337 | |
338 | return *InsertIntoBucket(TheBucket, std::move(Key)); |
339 | } |
340 | |
341 | ValueT &operator[](KeyT &&Key) { |
342 | return FindAndConstruct(std::move(Key)).second; |
343 | } |
344 | |
345 | /// isPointerIntoBucketsArray - Return true if the specified pointer points |
346 | /// somewhere into the DenseMap's array of buckets (i.e. either to a key or |
347 | /// value in the DenseMap). |
348 | bool isPointerIntoBucketsArray(const void *Ptr) const { |
349 | return Ptr >= getBuckets() && Ptr < getBucketsEnd(); |
350 | } |
351 | |
352 | /// getPointerIntoBucketsArray() - Return an opaque pointer into the buckets |
353 | /// array. In conjunction with the previous method, this can be used to |
354 | /// determine whether an insertion caused the DenseMap to reallocate. |
355 | const void *getPointerIntoBucketsArray() const { return getBuckets(); } |
356 | |
357 | protected: |
358 | DenseMapBase() = default; |
359 | |
360 | void destroyAll() { |
361 | if (getNumBuckets() == 0) // Nothing to do. |
362 | return; |
363 | |
364 | const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey(); |
365 | for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) { |
366 | if (!KeyInfoT::isEqual(P->getFirst(), EmptyKey) && |
367 | !KeyInfoT::isEqual(P->getFirst(), TombstoneKey)) |
368 | P->getSecond().~ValueT(); |
369 | P->getFirst().~KeyT(); |
370 | } |
371 | } |
372 | |
373 | void initEmpty() { |
374 | setNumEntries(0); |
375 | setNumTombstones(0); |
376 | |
377 | assert((getNumBuckets() & (getNumBuckets()-1)) == 0 &&((void)0) |
378 | "# initial buckets must be a power of two!")((void)0); |
379 | const KeyT EmptyKey = getEmptyKey(); |
380 | for (BucketT *B = getBuckets(), *E = getBucketsEnd(); B != E; ++B) |
381 | ::new (&B->getFirst()) KeyT(EmptyKey); |
382 | } |
383 | |
384 | /// Returns the number of buckets to allocate to ensure that the DenseMap can |
385 | /// accommodate \p NumEntries without need to grow(). |
386 | unsigned getMinBucketToReserveForEntries(unsigned NumEntries) { |
387 | // Ensure that "NumEntries * 4 < NumBuckets * 3" |
388 | if (NumEntries == 0) |
389 | return 0; |
390 | // +1 is required because of the strict equality. |
391 | // For example if NumEntries is 48, we need to return 401. |
392 | return NextPowerOf2(NumEntries * 4 / 3 + 1); |
393 | } |
394 | |
395 | void moveFromOldBuckets(BucketT *OldBucketsBegin, BucketT *OldBucketsEnd) { |
396 | initEmpty(); |
397 | |
398 | // Insert all the old elements. |
399 | const KeyT EmptyKey = getEmptyKey(); |
400 | const KeyT TombstoneKey = getTombstoneKey(); |
401 | for (BucketT *B = OldBucketsBegin, *E = OldBucketsEnd; B != E; ++B) { |
402 | if (!KeyInfoT::isEqual(B->getFirst(), EmptyKey) && |
403 | !KeyInfoT::isEqual(B->getFirst(), TombstoneKey)) { |
404 | // Insert the key/value into the new table. |
405 | BucketT *DestBucket; |
406 | bool FoundVal = LookupBucketFor(B->getFirst(), DestBucket); |
407 | (void)FoundVal; // silence warning. |
408 | assert(!FoundVal && "Key already in new map?")((void)0); |
409 | DestBucket->getFirst() = std::move(B->getFirst()); |
410 | ::new (&DestBucket->getSecond()) ValueT(std::move(B->getSecond())); |
411 | incrementNumEntries(); |
412 | |
413 | // Free the value. |
414 | B->getSecond().~ValueT(); |
415 | } |
416 | B->getFirst().~KeyT(); |
417 | } |
418 | } |
419 | |
420 | template <typename OtherBaseT> |
421 | void copyFrom( |
422 | const DenseMapBase<OtherBaseT, KeyT, ValueT, KeyInfoT, BucketT> &other) { |
423 | assert(&other != this)((void)0); |
424 | assert(getNumBuckets() == other.getNumBuckets())((void)0); |
425 | |
426 | setNumEntries(other.getNumEntries()); |
427 | setNumTombstones(other.getNumTombstones()); |
428 | |
429 | if (std::is_trivially_copyable<KeyT>::value && |
430 | std::is_trivially_copyable<ValueT>::value) |
431 | memcpy(reinterpret_cast<void *>(getBuckets()), other.getBuckets(), |
432 | getNumBuckets() * sizeof(BucketT)); |
433 | else |
434 | for (size_t i = 0; i < getNumBuckets(); ++i) { |
435 | ::new (&getBuckets()[i].getFirst()) |
436 | KeyT(other.getBuckets()[i].getFirst()); |
437 | if (!KeyInfoT::isEqual(getBuckets()[i].getFirst(), getEmptyKey()) && |
438 | !KeyInfoT::isEqual(getBuckets()[i].getFirst(), getTombstoneKey())) |
439 | ::new (&getBuckets()[i].getSecond()) |
440 | ValueT(other.getBuckets()[i].getSecond()); |
441 | } |
442 | } |
443 | |
444 | static unsigned getHashValue(const KeyT &Val) { |
445 | return KeyInfoT::getHashValue(Val); |
446 | } |
447 | |
448 | template<typename LookupKeyT> |
449 | static unsigned getHashValue(const LookupKeyT &Val) { |
450 | return KeyInfoT::getHashValue(Val); |
451 | } |
452 | |
453 | static const KeyT getEmptyKey() { |
454 | static_assert(std::is_base_of<DenseMapBase, DerivedT>::value, |
455 | "Must pass the derived type to this template!"); |
456 | return KeyInfoT::getEmptyKey(); |
457 | } |
458 | |
459 | static const KeyT getTombstoneKey() { |
460 | return KeyInfoT::getTombstoneKey(); |
461 | } |
462 | |
463 | private: |
464 | iterator makeIterator(BucketT *P, BucketT *E, |
465 | DebugEpochBase &Epoch, |
466 | bool NoAdvance=false) { |
467 | if (shouldReverseIterate<KeyT>()) { |
468 | BucketT *B = P == getBucketsEnd() ? getBuckets() : P + 1; |
469 | return iterator(B, E, Epoch, NoAdvance); |
470 | } |
471 | return iterator(P, E, Epoch, NoAdvance); |
472 | } |
473 | |
474 | const_iterator makeConstIterator(const BucketT *P, const BucketT *E, |
475 | const DebugEpochBase &Epoch, |
476 | const bool NoAdvance=false) const { |
477 | if (shouldReverseIterate<KeyT>()) { |
478 | const BucketT *B = P == getBucketsEnd() ? getBuckets() : P + 1; |
479 | return const_iterator(B, E, Epoch, NoAdvance); |
480 | } |
481 | return const_iterator(P, E, Epoch, NoAdvance); |
482 | } |
483 | |
484 | unsigned getNumEntries() const { |
485 | return static_cast<const DerivedT *>(this)->getNumEntries(); |
486 | } |
487 | |
488 | void setNumEntries(unsigned Num) { |
489 | static_cast<DerivedT *>(this)->setNumEntries(Num); |
490 | } |
491 | |
492 | void incrementNumEntries() { |
493 | setNumEntries(getNumEntries() + 1); |
494 | } |
495 | |
496 | void decrementNumEntries() { |
497 | setNumEntries(getNumEntries() - 1); |
498 | } |
499 | |
500 | unsigned getNumTombstones() const { |
501 | return static_cast<const DerivedT *>(this)->getNumTombstones(); |
502 | } |
503 | |
504 | void setNumTombstones(unsigned Num) { |
505 | static_cast<DerivedT *>(this)->setNumTombstones(Num); |
506 | } |
507 | |
508 | void incrementNumTombstones() { |
509 | setNumTombstones(getNumTombstones() + 1); |
510 | } |
511 | |
512 | void decrementNumTombstones() { |
513 | setNumTombstones(getNumTombstones() - 1); |
514 | } |
515 | |
516 | const BucketT *getBuckets() const { |
517 | return static_cast<const DerivedT *>(this)->getBuckets(); |
518 | } |
519 | |
520 | BucketT *getBuckets() { |
521 | return static_cast<DerivedT *>(this)->getBuckets(); |
522 | } |
523 | |
524 | unsigned getNumBuckets() const { |
525 | return static_cast<const DerivedT *>(this)->getNumBuckets(); |
526 | } |
527 | |
528 | BucketT *getBucketsEnd() { |
529 | return getBuckets() + getNumBuckets(); |
530 | } |
531 | |
532 | const BucketT *getBucketsEnd() const { |
533 | return getBuckets() + getNumBuckets(); |
534 | } |
535 | |
536 | void grow(unsigned AtLeast) { |
537 | static_cast<DerivedT *>(this)->grow(AtLeast); |
538 | } |
539 | |
540 | void shrink_and_clear() { |
541 | static_cast<DerivedT *>(this)->shrink_and_clear(); |
542 | } |
543 | |
544 | template <typename KeyArg, typename... ValueArgs> |
545 | BucketT *InsertIntoBucket(BucketT *TheBucket, KeyArg &&Key, |
546 | ValueArgs &&... Values) { |
547 | TheBucket = InsertIntoBucketImpl(Key, Key, TheBucket); |
548 | |
549 | TheBucket->getFirst() = std::forward<KeyArg>(Key); |
550 | ::new (&TheBucket->getSecond()) ValueT(std::forward<ValueArgs>(Values)...); |
551 | return TheBucket; |
552 | } |
553 | |
554 | template <typename LookupKeyT> |
555 | BucketT *InsertIntoBucketWithLookup(BucketT *TheBucket, KeyT &&Key, |
556 | ValueT &&Value, LookupKeyT &Lookup) { |
557 | TheBucket = InsertIntoBucketImpl(Key, Lookup, TheBucket); |
558 | |
559 | TheBucket->getFirst() = std::move(Key); |
560 | ::new (&TheBucket->getSecond()) ValueT(std::move(Value)); |
561 | return TheBucket; |
562 | } |
563 | |
564 | template <typename LookupKeyT> |
565 | BucketT *InsertIntoBucketImpl(const KeyT &Key, const LookupKeyT &Lookup, |
566 | BucketT *TheBucket) { |
567 | incrementEpoch(); |
568 | |
569 | // If the load of the hash table is more than 3/4, or if fewer than 1/8 of |
570 | // the buckets are empty (meaning that many are filled with tombstones), |
571 | // grow the table. |
572 | // |
573 | // The later case is tricky. For example, if we had one empty bucket with |
574 | // tons of tombstones, failing lookups (e.g. for insertion) would have to |
575 | // probe almost the entire table until it found the empty bucket. If the |
576 | // table completely filled with tombstones, no lookup would ever succeed, |
577 | // causing infinite loops in lookup. |
578 | unsigned NewNumEntries = getNumEntries() + 1; |
579 | unsigned NumBuckets = getNumBuckets(); |
580 | if (LLVM_UNLIKELY(NewNumEntries * 4 >= NumBuckets * 3)__builtin_expect((bool)(NewNumEntries * 4 >= NumBuckets * 3 ), false)) { |
581 | this->grow(NumBuckets * 2); |
582 | LookupBucketFor(Lookup, TheBucket); |
583 | NumBuckets = getNumBuckets(); |
584 | } else if (LLVM_UNLIKELY(NumBuckets-(NewNumEntries+getNumTombstones()) <=__builtin_expect((bool)(NumBuckets-(NewNumEntries+getNumTombstones ()) <= NumBuckets/8), false) |
585 | NumBuckets/8)__builtin_expect((bool)(NumBuckets-(NewNumEntries+getNumTombstones ()) <= NumBuckets/8), false)) { |
586 | this->grow(NumBuckets); |
587 | LookupBucketFor(Lookup, TheBucket); |
588 | } |
589 | assert(TheBucket)((void)0); |
590 | |
591 | // Only update the state after we've grown our bucket space appropriately |
592 | // so that when growing buckets we have self-consistent entry count. |
593 | incrementNumEntries(); |
594 | |
595 | // If we are writing over a tombstone, remember this. |
596 | const KeyT EmptyKey = getEmptyKey(); |
597 | if (!KeyInfoT::isEqual(TheBucket->getFirst(), EmptyKey)) |
598 | decrementNumTombstones(); |
599 | |
600 | return TheBucket; |
601 | } |
602 | |
603 | /// LookupBucketFor - Lookup the appropriate bucket for Val, returning it in |
604 | /// FoundBucket. If the bucket contains the key and a value, this returns |
605 | /// true, otherwise it returns a bucket with an empty marker or tombstone and |
606 | /// returns false. |
607 | template<typename LookupKeyT> |
608 | bool LookupBucketFor(const LookupKeyT &Val, |
609 | const BucketT *&FoundBucket) const { |
610 | const BucketT *BucketsPtr = getBuckets(); |
611 | const unsigned NumBuckets = getNumBuckets(); |
612 | |
613 | if (NumBuckets == 0) { |
614 | FoundBucket = nullptr; |
615 | return false; |
616 | } |
617 | |
618 | // FoundTombstone - Keep track of whether we find a tombstone while probing. |
619 | const BucketT *FoundTombstone = nullptr; |
620 | const KeyT EmptyKey = getEmptyKey(); |
621 | const KeyT TombstoneKey = getTombstoneKey(); |
622 | assert(!KeyInfoT::isEqual(Val, EmptyKey) &&((void)0) |
623 | !KeyInfoT::isEqual(Val, TombstoneKey) &&((void)0) |
624 | "Empty/Tombstone value shouldn't be inserted into map!")((void)0); |
625 | |
626 | unsigned BucketNo = getHashValue(Val) & (NumBuckets-1); |
627 | unsigned ProbeAmt = 1; |
628 | while (true) { |
629 | const BucketT *ThisBucket = BucketsPtr + BucketNo; |
630 | // Found Val's bucket? If so, return it. |
631 | if (LLVM_LIKELY(KeyInfoT::isEqual(Val, ThisBucket->getFirst()))__builtin_expect((bool)(KeyInfoT::isEqual(Val, ThisBucket-> getFirst())), true)) { |
632 | FoundBucket = ThisBucket; |
633 | return true; |
634 | } |
635 | |
636 | // If we found an empty bucket, the key doesn't exist in the set. |
637 | // Insert it and return the default value. |
638 | if (LLVM_LIKELY(KeyInfoT::isEqual(ThisBucket->getFirst(), EmptyKey))__builtin_expect((bool)(KeyInfoT::isEqual(ThisBucket->getFirst (), EmptyKey)), true)) { |
639 | // If we've already seen a tombstone while probing, fill it in instead |
640 | // of the empty bucket we eventually probed to. |
641 | FoundBucket = FoundTombstone ? FoundTombstone : ThisBucket; |
642 | return false; |
643 | } |
644 | |
645 | // If this is a tombstone, remember it. If Val ends up not in the map, we |
646 | // prefer to return it than something that would require more probing. |
647 | if (KeyInfoT::isEqual(ThisBucket->getFirst(), TombstoneKey) && |
648 | !FoundTombstone) |
649 | FoundTombstone = ThisBucket; // Remember the first tombstone found. |
650 | |
651 | // Otherwise, it's a hash collision or a tombstone, continue quadratic |
652 | // probing. |
653 | BucketNo += ProbeAmt++; |
654 | BucketNo &= (NumBuckets-1); |
655 | } |
656 | } |
657 | |
658 | template <typename LookupKeyT> |
659 | bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) { |
660 | const BucketT *ConstFoundBucket; |
661 | bool Result = const_cast<const DenseMapBase *>(this) |
662 | ->LookupBucketFor(Val, ConstFoundBucket); |
663 | FoundBucket = const_cast<BucketT *>(ConstFoundBucket); |
664 | return Result; |
665 | } |
666 | |
667 | public: |
668 | /// Return the approximate size (in bytes) of the actual map. |
669 | /// This is just the raw memory used by DenseMap. |
670 | /// If entries are pointers to objects, the size of the referenced objects |
671 | /// are not included. |
672 | size_t getMemorySize() const { |
673 | return getNumBuckets() * sizeof(BucketT); |
674 | } |
675 | }; |
676 | |
677 | /// Equality comparison for DenseMap. |
678 | /// |
679 | /// Iterates over elements of LHS confirming that each (key, value) pair in LHS |
680 | /// is also in RHS, and that no additional pairs are in RHS. |
681 | /// Equivalent to N calls to RHS.find and N value comparisons. Amortized |
682 | /// complexity is linear, worst case is O(N^2) (if every hash collides). |
683 | template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT, |
684 | typename BucketT> |
685 | bool operator==( |
686 | const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &LHS, |
687 | const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &RHS) { |
688 | if (LHS.size() != RHS.size()) |
689 | return false; |
690 | |
691 | for (auto &KV : LHS) { |
692 | auto I = RHS.find(KV.first); |
693 | if (I == RHS.end() || I->second != KV.second) |
694 | return false; |
695 | } |
696 | |
697 | return true; |
698 | } |
699 | |
700 | /// Inequality comparison for DenseMap. |
701 | /// |
702 | /// Equivalent to !(LHS == RHS). See operator== for performance notes. |
703 | template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT, |
704 | typename BucketT> |
705 | bool operator!=( |
706 | const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &LHS, |
707 | const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &RHS) { |
708 | return !(LHS == RHS); |
709 | } |
710 | |
711 | template <typename KeyT, typename ValueT, |
712 | typename KeyInfoT = DenseMapInfo<KeyT>, |
713 | typename BucketT = llvm::detail::DenseMapPair<KeyT, ValueT>> |
714 | class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>, |
715 | KeyT, ValueT, KeyInfoT, BucketT> { |
716 | friend class DenseMapBase<DenseMap, KeyT, ValueT, KeyInfoT, BucketT>; |
717 | |
718 | // Lift some types from the dependent base class into this class for |
719 | // simplicity of referring to them. |
720 | using BaseT = DenseMapBase<DenseMap, KeyT, ValueT, KeyInfoT, BucketT>; |
721 | |
722 | BucketT *Buckets; |
723 | unsigned NumEntries; |
724 | unsigned NumTombstones; |
725 | unsigned NumBuckets; |
726 | |
727 | public: |
728 | /// Create a DenseMap with an optional \p InitialReserve that guarantee that |
729 | /// this number of elements can be inserted in the map without grow() |
730 | explicit DenseMap(unsigned InitialReserve = 0) { init(InitialReserve); } |
731 | |
732 | DenseMap(const DenseMap &other) : BaseT() { |
733 | init(0); |
734 | copyFrom(other); |
735 | } |
736 | |
737 | DenseMap(DenseMap &&other) : BaseT() { |
738 | init(0); |
739 | swap(other); |
740 | } |
741 | |
742 | template<typename InputIt> |
743 | DenseMap(const InputIt &I, const InputIt &E) { |
744 | init(std::distance(I, E)); |
745 | this->insert(I, E); |
746 | } |
747 | |
748 | DenseMap(std::initializer_list<typename BaseT::value_type> Vals) { |
749 | init(Vals.size()); |
750 | this->insert(Vals.begin(), Vals.end()); |
751 | } |
752 | |
753 | ~DenseMap() { |
754 | this->destroyAll(); |
755 | deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); |
756 | } |
757 | |
758 | void swap(DenseMap& RHS) { |
759 | this->incrementEpoch(); |
760 | RHS.incrementEpoch(); |
761 | std::swap(Buckets, RHS.Buckets); |
762 | std::swap(NumEntries, RHS.NumEntries); |
763 | std::swap(NumTombstones, RHS.NumTombstones); |
764 | std::swap(NumBuckets, RHS.NumBuckets); |
765 | } |
766 | |
767 | DenseMap& operator=(const DenseMap& other) { |
768 | if (&other != this) |
769 | copyFrom(other); |
770 | return *this; |
771 | } |
772 | |
773 | DenseMap& operator=(DenseMap &&other) { |
774 | this->destroyAll(); |
775 | deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); |
776 | init(0); |
777 | swap(other); |
778 | return *this; |
779 | } |
780 | |
781 | void copyFrom(const DenseMap& other) { |
782 | this->destroyAll(); |
783 | deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); |
784 | if (allocateBuckets(other.NumBuckets)) { |
785 | this->BaseT::copyFrom(other); |
786 | } else { |
787 | NumEntries = 0; |
788 | NumTombstones = 0; |
789 | } |
790 | } |
791 | |
792 | void init(unsigned InitNumEntries) { |
793 | auto InitBuckets = BaseT::getMinBucketToReserveForEntries(InitNumEntries); |
794 | if (allocateBuckets(InitBuckets)) { |
795 | this->BaseT::initEmpty(); |
796 | } else { |
797 | NumEntries = 0; |
798 | NumTombstones = 0; |
799 | } |
800 | } |
801 | |
802 | void grow(unsigned AtLeast) { |
803 | unsigned OldNumBuckets = NumBuckets; |
804 | BucketT *OldBuckets = Buckets; |
805 | |
806 | allocateBuckets(std::max<unsigned>(64, static_cast<unsigned>(NextPowerOf2(AtLeast-1)))); |
807 | assert(Buckets)((void)0); |
808 | if (!OldBuckets) { |
809 | this->BaseT::initEmpty(); |
810 | return; |
811 | } |
812 | |
813 | this->moveFromOldBuckets(OldBuckets, OldBuckets+OldNumBuckets); |
814 | |
815 | // Free the old table. |
816 | deallocate_buffer(OldBuckets, sizeof(BucketT) * OldNumBuckets, |
817 | alignof(BucketT)); |
818 | } |
819 | |
820 | void shrink_and_clear() { |
821 | unsigned OldNumBuckets = NumBuckets; |
822 | unsigned OldNumEntries = NumEntries; |
823 | this->destroyAll(); |
824 | |
825 | // Reduce the number of buckets. |
826 | unsigned NewNumBuckets = 0; |
827 | if (OldNumEntries) |
828 | NewNumBuckets = std::max(64, 1 << (Log2_32_Ceil(OldNumEntries) + 1)); |
829 | if (NewNumBuckets == NumBuckets) { |
830 | this->BaseT::initEmpty(); |
831 | return; |
832 | } |
833 | |
834 | deallocate_buffer(Buckets, sizeof(BucketT) * OldNumBuckets, |
835 | alignof(BucketT)); |
836 | init(NewNumBuckets); |
837 | } |
838 | |
839 | private: |
840 | unsigned getNumEntries() const { |
841 | return NumEntries; |
842 | } |
843 | |
844 | void setNumEntries(unsigned Num) { |
845 | NumEntries = Num; |
846 | } |
847 | |
848 | unsigned getNumTombstones() const { |
849 | return NumTombstones; |
850 | } |
851 | |
852 | void setNumTombstones(unsigned Num) { |
853 | NumTombstones = Num; |
854 | } |
855 | |
856 | BucketT *getBuckets() const { |
857 | return Buckets; |
858 | } |
859 | |
860 | unsigned getNumBuckets() const { |
861 | return NumBuckets; |
862 | } |
863 | |
864 | bool allocateBuckets(unsigned Num) { |
865 | NumBuckets = Num; |
866 | if (NumBuckets == 0) { |
867 | Buckets = nullptr; |
868 | return false; |
869 | } |
870 | |
871 | Buckets = static_cast<BucketT *>( |
872 | allocate_buffer(sizeof(BucketT) * NumBuckets, alignof(BucketT))); |
873 | return true; |
874 | } |
875 | }; |
876 | |
877 | template <typename KeyT, typename ValueT, unsigned InlineBuckets = 4, |
878 | typename KeyInfoT = DenseMapInfo<KeyT>, |
879 | typename BucketT = llvm::detail::DenseMapPair<KeyT, ValueT>> |
880 | class SmallDenseMap |
881 | : public DenseMapBase< |
882 | SmallDenseMap<KeyT, ValueT, InlineBuckets, KeyInfoT, BucketT>, KeyT, |
883 | ValueT, KeyInfoT, BucketT> { |
884 | friend class DenseMapBase<SmallDenseMap, KeyT, ValueT, KeyInfoT, BucketT>; |
885 | |
886 | // Lift some types from the dependent base class into this class for |
887 | // simplicity of referring to them. |
888 | using BaseT = DenseMapBase<SmallDenseMap, KeyT, ValueT, KeyInfoT, BucketT>; |
889 | |
890 | static_assert(isPowerOf2_64(InlineBuckets), |
891 | "InlineBuckets must be a power of 2."); |
892 | |
893 | unsigned Small : 1; |
894 | unsigned NumEntries : 31; |
895 | unsigned NumTombstones; |
896 | |
897 | struct LargeRep { |
898 | BucketT *Buckets; |
899 | unsigned NumBuckets; |
900 | }; |
901 | |
902 | /// A "union" of an inline bucket array and the struct representing |
903 | /// a large bucket. This union will be discriminated by the 'Small' bit. |
904 | AlignedCharArrayUnion<BucketT[InlineBuckets], LargeRep> storage; |
905 | |
906 | public: |
907 | explicit SmallDenseMap(unsigned NumInitBuckets = 0) { |
908 | init(NumInitBuckets); |
909 | } |
910 | |
911 | SmallDenseMap(const SmallDenseMap &other) : BaseT() { |
912 | init(0); |
913 | copyFrom(other); |
914 | } |
915 | |
916 | SmallDenseMap(SmallDenseMap &&other) : BaseT() { |
917 | init(0); |
918 | swap(other); |
919 | } |
920 | |
921 | template<typename InputIt> |
922 | SmallDenseMap(const InputIt &I, const InputIt &E) { |
923 | init(NextPowerOf2(std::distance(I, E))); |
924 | this->insert(I, E); |
925 | } |
926 | |
927 | SmallDenseMap(std::initializer_list<typename BaseT::value_type> Vals) |
928 | : SmallDenseMap(Vals.begin(), Vals.end()) {} |
929 | |
930 | ~SmallDenseMap() { |
931 | this->destroyAll(); |
932 | deallocateBuckets(); |
933 | } |
934 | |
935 | void swap(SmallDenseMap& RHS) { |
936 | unsigned TmpNumEntries = RHS.NumEntries; |
937 | RHS.NumEntries = NumEntries; |
938 | NumEntries = TmpNumEntries; |
939 | std::swap(NumTombstones, RHS.NumTombstones); |
940 | |
941 | const KeyT EmptyKey = this->getEmptyKey(); |
942 | const KeyT TombstoneKey = this->getTombstoneKey(); |
943 | if (Small && RHS.Small) { |
944 | // If we're swapping inline bucket arrays, we have to cope with some of |
945 | // the tricky bits of DenseMap's storage system: the buckets are not |
946 | // fully initialized. Thus we swap every key, but we may have |
947 | // a one-directional move of the value. |
948 | for (unsigned i = 0, e = InlineBuckets; i != e; ++i) { |
949 | BucketT *LHSB = &getInlineBuckets()[i], |
950 | *RHSB = &RHS.getInlineBuckets()[i]; |
951 | bool hasLHSValue = (!KeyInfoT::isEqual(LHSB->getFirst(), EmptyKey) && |
952 | !KeyInfoT::isEqual(LHSB->getFirst(), TombstoneKey)); |
953 | bool hasRHSValue = (!KeyInfoT::isEqual(RHSB->getFirst(), EmptyKey) && |
954 | !KeyInfoT::isEqual(RHSB->getFirst(), TombstoneKey)); |
955 | if (hasLHSValue && hasRHSValue) { |
956 | // Swap together if we can... |
957 | std::swap(*LHSB, *RHSB); |
958 | continue; |
959 | } |
960 | // Swap separately and handle any asymmetry. |
961 | std::swap(LHSB->getFirst(), RHSB->getFirst()); |
962 | if (hasLHSValue) { |
963 | ::new (&RHSB->getSecond()) ValueT(std::move(LHSB->getSecond())); |
964 | LHSB->getSecond().~ValueT(); |
965 | } else if (hasRHSValue) { |
966 | ::new (&LHSB->getSecond()) ValueT(std::move(RHSB->getSecond())); |
967 | RHSB->getSecond().~ValueT(); |
968 | } |
969 | } |
970 | return; |
971 | } |
972 | if (!Small && !RHS.Small) { |
973 | std::swap(getLargeRep()->Buckets, RHS.getLargeRep()->Buckets); |
974 | std::swap(getLargeRep()->NumBuckets, RHS.getLargeRep()->NumBuckets); |
975 | return; |
976 | } |
977 | |
978 | SmallDenseMap &SmallSide = Small ? *this : RHS; |
979 | SmallDenseMap &LargeSide = Small ? RHS : *this; |
980 | |
981 | // First stash the large side's rep and move the small side across. |
982 | LargeRep TmpRep = std::move(*LargeSide.getLargeRep()); |
983 | LargeSide.getLargeRep()->~LargeRep(); |
984 | LargeSide.Small = true; |
985 | // This is similar to the standard move-from-old-buckets, but the bucket |
986 | // count hasn't actually rotated in this case. So we have to carefully |
987 | // move construct the keys and values into their new locations, but there |
988 | // is no need to re-hash things. |
989 | for (unsigned i = 0, e = InlineBuckets; i != e; ++i) { |
990 | BucketT *NewB = &LargeSide.getInlineBuckets()[i], |
991 | *OldB = &SmallSide.getInlineBuckets()[i]; |
992 | ::new (&NewB->getFirst()) KeyT(std::move(OldB->getFirst())); |
993 | OldB->getFirst().~KeyT(); |
994 | if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey) && |
995 | !KeyInfoT::isEqual(NewB->getFirst(), TombstoneKey)) { |
996 | ::new (&NewB->getSecond()) ValueT(std::move(OldB->getSecond())); |
997 | OldB->getSecond().~ValueT(); |
998 | } |
999 | } |
1000 | |
1001 | // The hard part of moving the small buckets across is done, just move |
1002 | // the TmpRep into its new home. |
1003 | SmallSide.Small = false; |
1004 | new (SmallSide.getLargeRep()) LargeRep(std::move(TmpRep)); |
1005 | } |
1006 | |
1007 | SmallDenseMap& operator=(const SmallDenseMap& other) { |
1008 | if (&other != this) |
1009 | copyFrom(other); |
1010 | return *this; |
1011 | } |
1012 | |
1013 | SmallDenseMap& operator=(SmallDenseMap &&other) { |
1014 | this->destroyAll(); |
1015 | deallocateBuckets(); |
1016 | init(0); |
1017 | swap(other); |
1018 | return *this; |
1019 | } |
1020 | |
1021 | void copyFrom(const SmallDenseMap& other) { |
1022 | this->destroyAll(); |
1023 | deallocateBuckets(); |
1024 | Small = true; |
1025 | if (other.getNumBuckets() > InlineBuckets) { |
1026 | Small = false; |
1027 | new (getLargeRep()) LargeRep(allocateBuckets(other.getNumBuckets())); |
1028 | } |
1029 | this->BaseT::copyFrom(other); |
1030 | } |
1031 | |
1032 | void init(unsigned InitBuckets) { |
1033 | Small = true; |
1034 | if (InitBuckets > InlineBuckets) { |
1035 | Small = false; |
1036 | new (getLargeRep()) LargeRep(allocateBuckets(InitBuckets)); |
1037 | } |
1038 | this->BaseT::initEmpty(); |
1039 | } |
1040 | |
1041 | void grow(unsigned AtLeast) { |
1042 | if (AtLeast > InlineBuckets) |
1043 | AtLeast = std::max<unsigned>(64, NextPowerOf2(AtLeast-1)); |
1044 | |
1045 | if (Small) { |
1046 | // First move the inline buckets into a temporary storage. |
1047 | AlignedCharArrayUnion<BucketT[InlineBuckets]> TmpStorage; |
1048 | BucketT *TmpBegin = reinterpret_cast<BucketT *>(&TmpStorage); |
1049 | BucketT *TmpEnd = TmpBegin; |
1050 | |
1051 | // Loop over the buckets, moving non-empty, non-tombstones into the |
1052 | // temporary storage. Have the loop move the TmpEnd forward as it goes. |
1053 | const KeyT EmptyKey = this->getEmptyKey(); |
1054 | const KeyT TombstoneKey = this->getTombstoneKey(); |
1055 | for (BucketT *P = getBuckets(), *E = P + InlineBuckets; P != E; ++P) { |
1056 | if (!KeyInfoT::isEqual(P->getFirst(), EmptyKey) && |
1057 | !KeyInfoT::isEqual(P->getFirst(), TombstoneKey)) { |
1058 | assert(size_t(TmpEnd - TmpBegin) < InlineBuckets &&((void)0) |
1059 | "Too many inline buckets!")((void)0); |
1060 | ::new (&TmpEnd->getFirst()) KeyT(std::move(P->getFirst())); |
1061 | ::new (&TmpEnd->getSecond()) ValueT(std::move(P->getSecond())); |
1062 | ++TmpEnd; |
1063 | P->getSecond().~ValueT(); |
1064 | } |
1065 | P->getFirst().~KeyT(); |
1066 | } |
1067 | |
1068 | // AtLeast == InlineBuckets can happen if there are many tombstones, |
1069 | // and grow() is used to remove them. Usually we always switch to the |
1070 | // large rep here. |
1071 | if (AtLeast > InlineBuckets) { |
1072 | Small = false; |
1073 | new (getLargeRep()) LargeRep(allocateBuckets(AtLeast)); |
1074 | } |
1075 | this->moveFromOldBuckets(TmpBegin, TmpEnd); |
1076 | return; |
1077 | } |
1078 | |
1079 | LargeRep OldRep = std::move(*getLargeRep()); |
1080 | getLargeRep()->~LargeRep(); |
1081 | if (AtLeast <= InlineBuckets) { |
1082 | Small = true; |
1083 | } else { |
1084 | new (getLargeRep()) LargeRep(allocateBuckets(AtLeast)); |
1085 | } |
1086 | |
1087 | this->moveFromOldBuckets(OldRep.Buckets, OldRep.Buckets+OldRep.NumBuckets); |
1088 | |
1089 | // Free the old table. |
1090 | deallocate_buffer(OldRep.Buckets, sizeof(BucketT) * OldRep.NumBuckets, |
1091 | alignof(BucketT)); |
1092 | } |
1093 | |
1094 | void shrink_and_clear() { |
1095 | unsigned OldSize = this->size(); |
1096 | this->destroyAll(); |
1097 | |
1098 | // Reduce the number of buckets. |
1099 | unsigned NewNumBuckets = 0; |
1100 | if (OldSize) { |
1101 | NewNumBuckets = 1 << (Log2_32_Ceil(OldSize) + 1); |
1102 | if (NewNumBuckets > InlineBuckets && NewNumBuckets < 64u) |
1103 | NewNumBuckets = 64; |
1104 | } |
1105 | if ((Small && NewNumBuckets <= InlineBuckets) || |
1106 | (!Small && NewNumBuckets == getLargeRep()->NumBuckets)) { |
1107 | this->BaseT::initEmpty(); |
1108 | return; |
1109 | } |
1110 | |
1111 | deallocateBuckets(); |
1112 | init(NewNumBuckets); |
1113 | } |
1114 | |
1115 | private: |
1116 | unsigned getNumEntries() const { |
1117 | return NumEntries; |
1118 | } |
1119 | |
1120 | void setNumEntries(unsigned Num) { |
1121 | // NumEntries is hardcoded to be 31 bits wide. |
1122 | assert(Num < (1U << 31) && "Cannot support more than 1<<31 entries")((void)0); |
1123 | NumEntries = Num; |
1124 | } |
1125 | |
1126 | unsigned getNumTombstones() const { |
1127 | return NumTombstones; |
1128 | } |
1129 | |
1130 | void setNumTombstones(unsigned Num) { |
1131 | NumTombstones = Num; |
1132 | } |
1133 | |
1134 | const BucketT *getInlineBuckets() const { |
1135 | assert(Small)((void)0); |
1136 | // Note that this cast does not violate aliasing rules as we assert that |
1137 | // the memory's dynamic type is the small, inline bucket buffer, and the |
1138 | // 'storage' is a POD containing a char buffer. |
1139 | return reinterpret_cast<const BucketT *>(&storage); |
1140 | } |
1141 | |
1142 | BucketT *getInlineBuckets() { |
1143 | return const_cast<BucketT *>( |
1144 | const_cast<const SmallDenseMap *>(this)->getInlineBuckets()); |
1145 | } |
1146 | |
1147 | const LargeRep *getLargeRep() const { |
1148 | assert(!Small)((void)0); |
1149 | // Note, same rule about aliasing as with getInlineBuckets. |
1150 | return reinterpret_cast<const LargeRep *>(&storage); |
1151 | } |
1152 | |
1153 | LargeRep *getLargeRep() { |
1154 | return const_cast<LargeRep *>( |
1155 | const_cast<const SmallDenseMap *>(this)->getLargeRep()); |
1156 | } |
1157 | |
1158 | const BucketT *getBuckets() const { |
1159 | return Small ? getInlineBuckets() : getLargeRep()->Buckets; |
1160 | } |
1161 | |
1162 | BucketT *getBuckets() { |
1163 | return const_cast<BucketT *>( |
1164 | const_cast<const SmallDenseMap *>(this)->getBuckets()); |
1165 | } |
1166 | |
1167 | unsigned getNumBuckets() const { |
1168 | return Small ? InlineBuckets : getLargeRep()->NumBuckets; |
1169 | } |
1170 | |
1171 | void deallocateBuckets() { |
1172 | if (Small) |
1173 | return; |
1174 | |
1175 | deallocate_buffer(getLargeRep()->Buckets, |
1176 | sizeof(BucketT) * getLargeRep()->NumBuckets, |
1177 | alignof(BucketT)); |
1178 | getLargeRep()->~LargeRep(); |
1179 | } |
1180 | |
1181 | LargeRep allocateBuckets(unsigned Num) { |
1182 | assert(Num > InlineBuckets && "Must allocate more buckets than are inline")((void)0); |
1183 | LargeRep Rep = {static_cast<BucketT *>(allocate_buffer( |
1184 | sizeof(BucketT) * Num, alignof(BucketT))), |
1185 | Num}; |
1186 | return Rep; |
1187 | } |
1188 | }; |
1189 | |
1190 | template <typename KeyT, typename ValueT, typename KeyInfoT, typename Bucket, |
1191 | bool IsConst> |
1192 | class DenseMapIterator : DebugEpochBase::HandleBase { |
1193 | friend class DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, true>; |
1194 | friend class DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, false>; |
1195 | |
1196 | public: |
1197 | using difference_type = ptrdiff_t; |
1198 | using value_type = |
1199 | typename std::conditional<IsConst, const Bucket, Bucket>::type; |
1200 | using pointer = value_type *; |
1201 | using reference = value_type &; |
1202 | using iterator_category = std::forward_iterator_tag; |
1203 | |
1204 | private: |
1205 | pointer Ptr = nullptr; |
1206 | pointer End = nullptr; |
1207 | |
1208 | public: |
1209 | DenseMapIterator() = default; |
1210 | |
1211 | DenseMapIterator(pointer Pos, pointer E, const DebugEpochBase &Epoch, |
1212 | bool NoAdvance = false) |
1213 | : DebugEpochBase::HandleBase(&Epoch), Ptr(Pos), End(E) { |
1214 | assert(isHandleInSync() && "invalid construction!")((void)0); |
1215 | |
1216 | if (NoAdvance) return; |
1217 | if (shouldReverseIterate<KeyT>()) { |
1218 | RetreatPastEmptyBuckets(); |
1219 | return; |
1220 | } |
1221 | AdvancePastEmptyBuckets(); |
1222 | } |
1223 | |
1224 | // Converting ctor from non-const iterators to const iterators. SFINAE'd out |
1225 | // for const iterator destinations so it doesn't end up as a user defined copy |
1226 | // constructor. |
1227 | template <bool IsConstSrc, |
1228 | typename = std::enable_if_t<!IsConstSrc && IsConst>> |
1229 | DenseMapIterator( |
1230 | const DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, IsConstSrc> &I) |
1231 | : DebugEpochBase::HandleBase(I), Ptr(I.Ptr), End(I.End) {} |
1232 | |
1233 | reference operator*() const { |
1234 | assert(isHandleInSync() && "invalid iterator access!")((void)0); |
1235 | assert(Ptr != End && "dereferencing end() iterator")((void)0); |
1236 | if (shouldReverseIterate<KeyT>()) |
1237 | return Ptr[-1]; |
1238 | return *Ptr; |
1239 | } |
1240 | pointer operator->() const { |
1241 | assert(isHandleInSync() && "invalid iterator access!")((void)0); |
1242 | assert(Ptr != End && "dereferencing end() iterator")((void)0); |
1243 | if (shouldReverseIterate<KeyT>()) |
1244 | return &(Ptr[-1]); |
1245 | return Ptr; |
1246 | } |
1247 | |
1248 | friend bool operator==(const DenseMapIterator &LHS, |
1249 | const DenseMapIterator &RHS) { |
1250 | assert((!LHS.Ptr || LHS.isHandleInSync()) && "handle not in sync!")((void)0); |
1251 | assert((!RHS.Ptr || RHS.isHandleInSync()) && "handle not in sync!")((void)0); |
1252 | assert(LHS.getEpochAddress() == RHS.getEpochAddress() &&((void)0) |
1253 | "comparing incomparable iterators!")((void)0); |
1254 | return LHS.Ptr == RHS.Ptr; |
1255 | } |
1256 | |
1257 | friend bool operator!=(const DenseMapIterator &LHS, |
1258 | const DenseMapIterator &RHS) { |
1259 | return !(LHS == RHS); |
1260 | } |
1261 | |
1262 | inline DenseMapIterator& operator++() { // Preincrement |
1263 | assert(isHandleInSync() && "invalid iterator access!")((void)0); |
1264 | assert(Ptr != End && "incrementing end() iterator")((void)0); |
1265 | if (shouldReverseIterate<KeyT>()) { |
1266 | --Ptr; |
1267 | RetreatPastEmptyBuckets(); |
1268 | return *this; |
1269 | } |
1270 | ++Ptr; |
1271 | AdvancePastEmptyBuckets(); |
1272 | return *this; |
1273 | } |
1274 | DenseMapIterator operator++(int) { // Postincrement |
1275 | assert(isHandleInSync() && "invalid iterator access!")((void)0); |
1276 | DenseMapIterator tmp = *this; ++*this; return tmp; |
1277 | } |
1278 | |
1279 | private: |
1280 | void AdvancePastEmptyBuckets() { |
1281 | assert(Ptr <= End)((void)0); |
1282 | const KeyT Empty = KeyInfoT::getEmptyKey(); |
1283 | const KeyT Tombstone = KeyInfoT::getTombstoneKey(); |
1284 | |
1285 | while (Ptr != End && (KeyInfoT::isEqual(Ptr->getFirst(), Empty) || |
1286 | KeyInfoT::isEqual(Ptr->getFirst(), Tombstone))) |
1287 | ++Ptr; |
1288 | } |
1289 | |
1290 | void RetreatPastEmptyBuckets() { |
1291 | assert(Ptr >= End)((void)0); |
1292 | const KeyT Empty = KeyInfoT::getEmptyKey(); |
1293 | const KeyT Tombstone = KeyInfoT::getTombstoneKey(); |
1294 | |
1295 | while (Ptr != End && (KeyInfoT::isEqual(Ptr[-1].getFirst(), Empty) || |
1296 | KeyInfoT::isEqual(Ptr[-1].getFirst(), Tombstone))) |
1297 | --Ptr; |
1298 | } |
1299 | }; |
1300 | |
1301 | template <typename KeyT, typename ValueT, typename KeyInfoT> |
1302 | inline size_t capacity_in_bytes(const DenseMap<KeyT, ValueT, KeyInfoT> &X) { |
1303 | return X.getMemorySize(); |
1304 | } |
1305 | |
1306 | } // end namespace llvm |
1307 | |
1308 | #endif // LLVM_ADT_DENSEMAP_H |