| File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Analysis/VectorUtils.cpp |
| Warning: | line 1180, column 11 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===// | ||||||||||
| 2 | // | ||||||||||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||||
| 6 | // | ||||||||||
| 7 | //===----------------------------------------------------------------------===// | ||||||||||
| 8 | // | ||||||||||
| 9 | // This file defines vectorizer utilities. | ||||||||||
| 10 | // | ||||||||||
| 11 | //===----------------------------------------------------------------------===// | ||||||||||
| 12 | |||||||||||
| 13 | #include "llvm/Analysis/VectorUtils.h" | ||||||||||
| 14 | #include "llvm/ADT/EquivalenceClasses.h" | ||||||||||
| 15 | #include "llvm/Analysis/DemandedBits.h" | ||||||||||
| 16 | #include "llvm/Analysis/LoopInfo.h" | ||||||||||
| 17 | #include "llvm/Analysis/LoopIterator.h" | ||||||||||
| 18 | #include "llvm/Analysis/ScalarEvolution.h" | ||||||||||
| 19 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | ||||||||||
| 20 | #include "llvm/Analysis/TargetTransformInfo.h" | ||||||||||
| 21 | #include "llvm/Analysis/ValueTracking.h" | ||||||||||
| 22 | #include "llvm/IR/Constants.h" | ||||||||||
| 23 | #include "llvm/IR/GetElementPtrTypeIterator.h" | ||||||||||
| 24 | #include "llvm/IR/IRBuilder.h" | ||||||||||
| 25 | #include "llvm/IR/PatternMatch.h" | ||||||||||
| 26 | #include "llvm/IR/Value.h" | ||||||||||
| 27 | #include "llvm/Support/CommandLine.h" | ||||||||||
| 28 | |||||||||||
| 29 | #define DEBUG_TYPE"vectorutils" "vectorutils" | ||||||||||
| 30 | |||||||||||
| 31 | using namespace llvm; | ||||||||||
| 32 | using namespace llvm::PatternMatch; | ||||||||||
| 33 | |||||||||||
| 34 | /// Maximum factor for an interleaved memory access. | ||||||||||
| 35 | static cl::opt<unsigned> MaxInterleaveGroupFactor( | ||||||||||
| 36 | "max-interleave-group-factor", cl::Hidden, | ||||||||||
| 37 | cl::desc("Maximum factor for an interleaved access group (default = 8)"), | ||||||||||
| 38 | cl::init(8)); | ||||||||||
| 39 | |||||||||||
| 40 | /// Return true if all of the intrinsic's arguments and return type are scalars | ||||||||||
| 41 | /// for the scalar form of the intrinsic, and vectors for the vector form of the | ||||||||||
| 42 | /// intrinsic (except operands that are marked as always being scalar by | ||||||||||
| 43 | /// hasVectorInstrinsicScalarOpd). | ||||||||||
| 44 | bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { | ||||||||||
| 45 | switch (ID) { | ||||||||||
| 46 | case Intrinsic::abs: // Begin integer bit-manipulation. | ||||||||||
| 47 | case Intrinsic::bswap: | ||||||||||
| 48 | case Intrinsic::bitreverse: | ||||||||||
| 49 | case Intrinsic::ctpop: | ||||||||||
| 50 | case Intrinsic::ctlz: | ||||||||||
| 51 | case Intrinsic::cttz: | ||||||||||
| 52 | case Intrinsic::fshl: | ||||||||||
| 53 | case Intrinsic::fshr: | ||||||||||
| 54 | case Intrinsic::smax: | ||||||||||
| 55 | case Intrinsic::smin: | ||||||||||
| 56 | case Intrinsic::umax: | ||||||||||
| 57 | case Intrinsic::umin: | ||||||||||
| 58 | case Intrinsic::sadd_sat: | ||||||||||
| 59 | case Intrinsic::ssub_sat: | ||||||||||
| 60 | case Intrinsic::uadd_sat: | ||||||||||
| 61 | case Intrinsic::usub_sat: | ||||||||||
| 62 | case Intrinsic::smul_fix: | ||||||||||
| 63 | case Intrinsic::smul_fix_sat: | ||||||||||
| 64 | case Intrinsic::umul_fix: | ||||||||||
| 65 | case Intrinsic::umul_fix_sat: | ||||||||||
| 66 | case Intrinsic::sqrt: // Begin floating-point. | ||||||||||
| 67 | case Intrinsic::sin: | ||||||||||
| 68 | case Intrinsic::cos: | ||||||||||
| 69 | case Intrinsic::exp: | ||||||||||
| 70 | case Intrinsic::exp2: | ||||||||||
| 71 | case Intrinsic::log: | ||||||||||
| 72 | case Intrinsic::log10: | ||||||||||
| 73 | case Intrinsic::log2: | ||||||||||
| 74 | case Intrinsic::fabs: | ||||||||||
| 75 | case Intrinsic::minnum: | ||||||||||
| 76 | case Intrinsic::maxnum: | ||||||||||
| 77 | case Intrinsic::minimum: | ||||||||||
| 78 | case Intrinsic::maximum: | ||||||||||
| 79 | case Intrinsic::copysign: | ||||||||||
| 80 | case Intrinsic::floor: | ||||||||||
| 81 | case Intrinsic::ceil: | ||||||||||
| 82 | case Intrinsic::trunc: | ||||||||||
| 83 | case Intrinsic::rint: | ||||||||||
| 84 | case Intrinsic::nearbyint: | ||||||||||
| 85 | case Intrinsic::round: | ||||||||||
| 86 | case Intrinsic::roundeven: | ||||||||||
| 87 | case Intrinsic::pow: | ||||||||||
| 88 | case Intrinsic::fma: | ||||||||||
| 89 | case Intrinsic::fmuladd: | ||||||||||
| 90 | case Intrinsic::powi: | ||||||||||
| 91 | case Intrinsic::canonicalize: | ||||||||||
| 92 | return true; | ||||||||||
| 93 | default: | ||||||||||
| 94 | return false; | ||||||||||
| 95 | } | ||||||||||
| 96 | } | ||||||||||
| 97 | |||||||||||
| 98 | /// Identifies if the vector form of the intrinsic has a scalar operand. | ||||||||||
| 99 | bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, | ||||||||||
| 100 | unsigned ScalarOpdIdx) { | ||||||||||
| 101 | switch (ID) { | ||||||||||
| 102 | case Intrinsic::abs: | ||||||||||
| 103 | case Intrinsic::ctlz: | ||||||||||
| 104 | case Intrinsic::cttz: | ||||||||||
| 105 | case Intrinsic::powi: | ||||||||||
| 106 | return (ScalarOpdIdx == 1); | ||||||||||
| 107 | case Intrinsic::smul_fix: | ||||||||||
| 108 | case Intrinsic::smul_fix_sat: | ||||||||||
| 109 | case Intrinsic::umul_fix: | ||||||||||
| 110 | case Intrinsic::umul_fix_sat: | ||||||||||
| 111 | return (ScalarOpdIdx == 2); | ||||||||||
| 112 | default: | ||||||||||
| 113 | return false; | ||||||||||
| 114 | } | ||||||||||
| 115 | } | ||||||||||
| 116 | |||||||||||
| 117 | bool llvm::hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID, | ||||||||||
| 118 | unsigned ScalarOpdIdx) { | ||||||||||
| 119 | switch (ID) { | ||||||||||
| 120 | case Intrinsic::powi: | ||||||||||
| 121 | return (ScalarOpdIdx == 1); | ||||||||||
| 122 | default: | ||||||||||
| 123 | return false; | ||||||||||
| 124 | } | ||||||||||
| 125 | } | ||||||||||
| 126 | |||||||||||
| 127 | /// Returns intrinsic ID for call. | ||||||||||
| 128 | /// For the input call instruction it finds mapping intrinsic and returns | ||||||||||
| 129 | /// its ID, in case it does not found it return not_intrinsic. | ||||||||||
| 130 | Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI, | ||||||||||
| 131 | const TargetLibraryInfo *TLI) { | ||||||||||
| 132 | Intrinsic::ID ID = getIntrinsicForCallSite(*CI, TLI); | ||||||||||
| 133 | if (ID == Intrinsic::not_intrinsic) | ||||||||||
| 134 | return Intrinsic::not_intrinsic; | ||||||||||
| 135 | |||||||||||
| 136 | if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || | ||||||||||
| 137 | ID == Intrinsic::lifetime_end || ID == Intrinsic::assume || | ||||||||||
| 138 | ID == Intrinsic::experimental_noalias_scope_decl || | ||||||||||
| 139 | ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe) | ||||||||||
| 140 | return ID; | ||||||||||
| 141 | return Intrinsic::not_intrinsic; | ||||||||||
| 142 | } | ||||||||||
| 143 | |||||||||||
| 144 | /// Find the operand of the GEP that should be checked for consecutive | ||||||||||
| 145 | /// stores. This ignores trailing indices that have no effect on the final | ||||||||||
| 146 | /// pointer. | ||||||||||
| 147 | unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) { | ||||||||||
| 148 | const DataLayout &DL = Gep->getModule()->getDataLayout(); | ||||||||||
| 149 | unsigned LastOperand = Gep->getNumOperands() - 1; | ||||||||||
| 150 | TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType()); | ||||||||||
| 151 | |||||||||||
| 152 | // Walk backwards and try to peel off zeros. | ||||||||||
| 153 | while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) { | ||||||||||
| 154 | // Find the type we're currently indexing into. | ||||||||||
| 155 | gep_type_iterator GEPTI = gep_type_begin(Gep); | ||||||||||
| 156 | std::advance(GEPTI, LastOperand - 2); | ||||||||||
| 157 | |||||||||||
| 158 | // If it's a type with the same allocation size as the result of the GEP we | ||||||||||
| 159 | // can peel off the zero index. | ||||||||||
| 160 | if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize) | ||||||||||
| 161 | break; | ||||||||||
| 162 | --LastOperand; | ||||||||||
| 163 | } | ||||||||||
| 164 | |||||||||||
| 165 | return LastOperand; | ||||||||||
| 166 | } | ||||||||||
| 167 | |||||||||||
| 168 | /// If the argument is a GEP, then returns the operand identified by | ||||||||||
| 169 | /// getGEPInductionOperand. However, if there is some other non-loop-invariant | ||||||||||
| 170 | /// operand, it returns that instead. | ||||||||||
| 171 | Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { | ||||||||||
| 172 | GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); | ||||||||||
| 173 | if (!GEP) | ||||||||||
| 174 | return Ptr; | ||||||||||
| 175 | |||||||||||
| 176 | unsigned InductionOperand = getGEPInductionOperand(GEP); | ||||||||||
| 177 | |||||||||||
| 178 | // Check that all of the gep indices are uniform except for our induction | ||||||||||
| 179 | // operand. | ||||||||||
| 180 | for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) | ||||||||||
| 181 | if (i != InductionOperand && | ||||||||||
| 182 | !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp)) | ||||||||||
| 183 | return Ptr; | ||||||||||
| 184 | return GEP->getOperand(InductionOperand); | ||||||||||
| 185 | } | ||||||||||
| 186 | |||||||||||
| 187 | /// If a value has only one user that is a CastInst, return it. | ||||||||||
| 188 | Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) { | ||||||||||
| 189 | Value *UniqueCast = nullptr; | ||||||||||
| 190 | for (User *U : Ptr->users()) { | ||||||||||
| 191 | CastInst *CI = dyn_cast<CastInst>(U); | ||||||||||
| 192 | if (CI && CI->getType() == Ty) { | ||||||||||
| 193 | if (!UniqueCast) | ||||||||||
| 194 | UniqueCast = CI; | ||||||||||
| 195 | else | ||||||||||
| 196 | return nullptr; | ||||||||||
| 197 | } | ||||||||||
| 198 | } | ||||||||||
| 199 | return UniqueCast; | ||||||||||
| 200 | } | ||||||||||
| 201 | |||||||||||
| 202 | /// Get the stride of a pointer access in a loop. Looks for symbolic | ||||||||||
| 203 | /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. | ||||||||||
| 204 | Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { | ||||||||||
| 205 | auto *PtrTy = dyn_cast<PointerType>(Ptr->getType()); | ||||||||||
| 206 | if (!PtrTy || PtrTy->isAggregateType()) | ||||||||||
| 207 | return nullptr; | ||||||||||
| 208 | |||||||||||
| 209 | // Try to remove a gep instruction to make the pointer (actually index at this | ||||||||||
| 210 | // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the | ||||||||||
| 211 | // pointer, otherwise, we are analyzing the index. | ||||||||||
| 212 | Value *OrigPtr = Ptr; | ||||||||||
| 213 | |||||||||||
| 214 | // The size of the pointer access. | ||||||||||
| 215 | int64_t PtrAccessSize = 1; | ||||||||||
| 216 | |||||||||||
| 217 | Ptr = stripGetElementPtr(Ptr, SE, Lp); | ||||||||||
| 218 | const SCEV *V = SE->getSCEV(Ptr); | ||||||||||
| 219 | |||||||||||
| 220 | if (Ptr != OrigPtr) | ||||||||||
| 221 | // Strip off casts. | ||||||||||
| 222 | while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) | ||||||||||
| 223 | V = C->getOperand(); | ||||||||||
| 224 | |||||||||||
| 225 | const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V); | ||||||||||
| 226 | if (!S) | ||||||||||
| 227 | return nullptr; | ||||||||||
| 228 | |||||||||||
| 229 | V = S->getStepRecurrence(*SE); | ||||||||||
| 230 | if (!V) | ||||||||||
| 231 | return nullptr; | ||||||||||
| 232 | |||||||||||
| 233 | // Strip off the size of access multiplication if we are still analyzing the | ||||||||||
| 234 | // pointer. | ||||||||||
| 235 | if (OrigPtr == Ptr) { | ||||||||||
| 236 | if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) { | ||||||||||
| 237 | if (M->getOperand(0)->getSCEVType() != scConstant) | ||||||||||
| 238 | return nullptr; | ||||||||||
| 239 | |||||||||||
| 240 | const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt(); | ||||||||||
| 241 | |||||||||||
| 242 | // Huge step value - give up. | ||||||||||
| 243 | if (APStepVal.getBitWidth() > 64) | ||||||||||
| 244 | return nullptr; | ||||||||||
| 245 | |||||||||||
| 246 | int64_t StepVal = APStepVal.getSExtValue(); | ||||||||||
| 247 | if (PtrAccessSize != StepVal) | ||||||||||
| 248 | return nullptr; | ||||||||||
| 249 | V = M->getOperand(1); | ||||||||||
| 250 | } | ||||||||||
| 251 | } | ||||||||||
| 252 | |||||||||||
| 253 | // Strip off casts. | ||||||||||
| 254 | Type *StripedOffRecurrenceCast = nullptr; | ||||||||||
| 255 | if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) { | ||||||||||
| 256 | StripedOffRecurrenceCast = C->getType(); | ||||||||||
| 257 | V = C->getOperand(); | ||||||||||
| 258 | } | ||||||||||
| 259 | |||||||||||
| 260 | // Look for the loop invariant symbolic value. | ||||||||||
| 261 | const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V); | ||||||||||
| 262 | if (!U) | ||||||||||
| 263 | return nullptr; | ||||||||||
| 264 | |||||||||||
| 265 | Value *Stride = U->getValue(); | ||||||||||
| 266 | if (!Lp->isLoopInvariant(Stride)) | ||||||||||
| 267 | return nullptr; | ||||||||||
| 268 | |||||||||||
| 269 | // If we have stripped off the recurrence cast we have to make sure that we | ||||||||||
| 270 | // return the value that is used in this loop so that we can replace it later. | ||||||||||
| 271 | if (StripedOffRecurrenceCast) | ||||||||||
| 272 | Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast); | ||||||||||
| 273 | |||||||||||
| 274 | return Stride; | ||||||||||
| 275 | } | ||||||||||
| 276 | |||||||||||
| 277 | /// Given a vector and an element number, see if the scalar value is | ||||||||||
| 278 | /// already around as a register, for example if it were inserted then extracted | ||||||||||
| 279 | /// from the vector. | ||||||||||
| 280 | Value *llvm::findScalarElement(Value *V, unsigned EltNo) { | ||||||||||
| 281 | assert(V->getType()->isVectorTy() && "Not looking at a vector?")((void)0); | ||||||||||
| 282 | VectorType *VTy = cast<VectorType>(V->getType()); | ||||||||||
| 283 | // For fixed-length vector, return undef for out of range access. | ||||||||||
| 284 | if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { | ||||||||||
| 285 | unsigned Width = FVTy->getNumElements(); | ||||||||||
| 286 | if (EltNo >= Width) | ||||||||||
| 287 | return UndefValue::get(FVTy->getElementType()); | ||||||||||
| 288 | } | ||||||||||
| 289 | |||||||||||
| 290 | if (Constant *C = dyn_cast<Constant>(V)) | ||||||||||
| 291 | return C->getAggregateElement(EltNo); | ||||||||||
| 292 | |||||||||||
| 293 | if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { | ||||||||||
| 294 | // If this is an insert to a variable element, we don't know what it is. | ||||||||||
| 295 | if (!isa<ConstantInt>(III->getOperand(2))) | ||||||||||
| 296 | return nullptr; | ||||||||||
| 297 | unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); | ||||||||||
| 298 | |||||||||||
| 299 | // If this is an insert to the element we are looking for, return the | ||||||||||
| 300 | // inserted value. | ||||||||||
| 301 | if (EltNo == IIElt) | ||||||||||
| 302 | return III->getOperand(1); | ||||||||||
| 303 | |||||||||||
| 304 | // Guard against infinite loop on malformed, unreachable IR. | ||||||||||
| 305 | if (III == III->getOperand(0)) | ||||||||||
| 306 | return nullptr; | ||||||||||
| 307 | |||||||||||
| 308 | // Otherwise, the insertelement doesn't modify the value, recurse on its | ||||||||||
| 309 | // vector input. | ||||||||||
| 310 | return findScalarElement(III->getOperand(0), EltNo); | ||||||||||
| 311 | } | ||||||||||
| 312 | |||||||||||
| 313 | ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V); | ||||||||||
| 314 | // Restrict the following transformation to fixed-length vector. | ||||||||||
| 315 | if (SVI && isa<FixedVectorType>(SVI->getType())) { | ||||||||||
| 316 | unsigned LHSWidth = | ||||||||||
| 317 | cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements(); | ||||||||||
| 318 | int InEl = SVI->getMaskValue(EltNo); | ||||||||||
| 319 | if (InEl < 0) | ||||||||||
| 320 | return UndefValue::get(VTy->getElementType()); | ||||||||||
| 321 | if (InEl < (int)LHSWidth) | ||||||||||
| 322 | return findScalarElement(SVI->getOperand(0), InEl); | ||||||||||
| 323 | return findScalarElement(SVI->getOperand(1), InEl - LHSWidth); | ||||||||||
| 324 | } | ||||||||||
| 325 | |||||||||||
| 326 | // Extract a value from a vector add operation with a constant zero. | ||||||||||
| 327 | // TODO: Use getBinOpIdentity() to generalize this. | ||||||||||
| 328 | Value *Val; Constant *C; | ||||||||||
| 329 | if (match(V, m_Add(m_Value(Val), m_Constant(C)))) | ||||||||||
| 330 | if (Constant *Elt = C->getAggregateElement(EltNo)) | ||||||||||
| 331 | if (Elt->isNullValue()) | ||||||||||
| 332 | return findScalarElement(Val, EltNo); | ||||||||||
| 333 | |||||||||||
| 334 | // Otherwise, we don't know. | ||||||||||
| 335 | return nullptr; | ||||||||||
| 336 | } | ||||||||||
| 337 | |||||||||||
| 338 | int llvm::getSplatIndex(ArrayRef<int> Mask) { | ||||||||||
| 339 | int SplatIndex = -1; | ||||||||||
| 340 | for (int M : Mask) { | ||||||||||
| 341 | // Ignore invalid (undefined) mask elements. | ||||||||||
| 342 | if (M < 0) | ||||||||||
| 343 | continue; | ||||||||||
| 344 | |||||||||||
| 345 | // There can be only 1 non-negative mask element value if this is a splat. | ||||||||||
| 346 | if (SplatIndex != -1 && SplatIndex != M) | ||||||||||
| 347 | return -1; | ||||||||||
| 348 | |||||||||||
| 349 | // Initialize the splat index to the 1st non-negative mask element. | ||||||||||
| 350 | SplatIndex = M; | ||||||||||
| 351 | } | ||||||||||
| 352 | assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?")((void)0); | ||||||||||
| 353 | return SplatIndex; | ||||||||||
| 354 | } | ||||||||||
| 355 | |||||||||||
| 356 | /// Get splat value if the input is a splat vector or return nullptr. | ||||||||||
| 357 | /// This function is not fully general. It checks only 2 cases: | ||||||||||
| 358 | /// the input value is (1) a splat constant vector or (2) a sequence | ||||||||||
| 359 | /// of instructions that broadcasts a scalar at element 0. | ||||||||||
| 360 | Value *llvm::getSplatValue(const Value *V) { | ||||||||||
| 361 | if (isa<VectorType>(V->getType())) | ||||||||||
| 362 | if (auto *C = dyn_cast<Constant>(V)) | ||||||||||
| 363 | return C->getSplatValue(); | ||||||||||
| 364 | |||||||||||
| 365 | // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...> | ||||||||||
| 366 | Value *Splat; | ||||||||||
| 367 | if (match(V, | ||||||||||
| 368 | m_Shuffle(m_InsertElt(m_Value(), m_Value(Splat), m_ZeroInt()), | ||||||||||
| 369 | m_Value(), m_ZeroMask()))) | ||||||||||
| 370 | return Splat; | ||||||||||
| 371 | |||||||||||
| 372 | return nullptr; | ||||||||||
| 373 | } | ||||||||||
| 374 | |||||||||||
| 375 | bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) { | ||||||||||
| 376 | assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth")((void)0); | ||||||||||
| 377 | |||||||||||
| 378 | if (isa<VectorType>(V->getType())) { | ||||||||||
| 379 | if (isa<UndefValue>(V)) | ||||||||||
| 380 | return true; | ||||||||||
| 381 | // FIXME: We can allow undefs, but if Index was specified, we may want to | ||||||||||
| 382 | // check that the constant is defined at that index. | ||||||||||
| 383 | if (auto *C = dyn_cast<Constant>(V)) | ||||||||||
| 384 | return C->getSplatValue() != nullptr; | ||||||||||
| 385 | } | ||||||||||
| 386 | |||||||||||
| 387 | if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) { | ||||||||||
| 388 | // FIXME: We can safely allow undefs here. If Index was specified, we will | ||||||||||
| 389 | // check that the mask elt is defined at the required index. | ||||||||||
| 390 | if (!is_splat(Shuf->getShuffleMask())) | ||||||||||
| 391 | return false; | ||||||||||
| 392 | |||||||||||
| 393 | // Match any index. | ||||||||||
| 394 | if (Index == -1) | ||||||||||
| 395 | return true; | ||||||||||
| 396 | |||||||||||
| 397 | // Match a specific element. The mask should be defined at and match the | ||||||||||
| 398 | // specified index. | ||||||||||
| 399 | return Shuf->getMaskValue(Index) == Index; | ||||||||||
| 400 | } | ||||||||||
| 401 | |||||||||||
| 402 | // The remaining tests are all recursive, so bail out if we hit the limit. | ||||||||||
| 403 | if (Depth++ == MaxAnalysisRecursionDepth) | ||||||||||
| 404 | return false; | ||||||||||
| 405 | |||||||||||
| 406 | // If both operands of a binop are splats, the result is a splat. | ||||||||||
| 407 | Value *X, *Y, *Z; | ||||||||||
| 408 | if (match(V, m_BinOp(m_Value(X), m_Value(Y)))) | ||||||||||
| 409 | return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth); | ||||||||||
| 410 | |||||||||||
| 411 | // If all operands of a select are splats, the result is a splat. | ||||||||||
| 412 | if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z)))) | ||||||||||
| 413 | return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) && | ||||||||||
| 414 | isSplatValue(Z, Index, Depth); | ||||||||||
| 415 | |||||||||||
| 416 | // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops). | ||||||||||
| 417 | |||||||||||
| 418 | return false; | ||||||||||
| 419 | } | ||||||||||
| 420 | |||||||||||
| 421 | void llvm::narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask, | ||||||||||
| 422 | SmallVectorImpl<int> &ScaledMask) { | ||||||||||
| 423 | assert(Scale > 0 && "Unexpected scaling factor")((void)0); | ||||||||||
| 424 | |||||||||||
| 425 | // Fast-path: if no scaling, then it is just a copy. | ||||||||||
| 426 | if (Scale == 1) { | ||||||||||
| 427 | ScaledMask.assign(Mask.begin(), Mask.end()); | ||||||||||
| 428 | return; | ||||||||||
| 429 | } | ||||||||||
| 430 | |||||||||||
| 431 | ScaledMask.clear(); | ||||||||||
| 432 | for (int MaskElt : Mask) { | ||||||||||
| 433 | if (MaskElt >= 0) { | ||||||||||
| 434 | assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&((void)0) | ||||||||||
| 435 | "Overflowed 32-bits")((void)0); | ||||||||||
| 436 | } | ||||||||||
| 437 | for (int SliceElt = 0; SliceElt != Scale; ++SliceElt) | ||||||||||
| 438 | ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt); | ||||||||||
| 439 | } | ||||||||||
| 440 | } | ||||||||||
| 441 | |||||||||||
| 442 | bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask, | ||||||||||
| 443 | SmallVectorImpl<int> &ScaledMask) { | ||||||||||
| 444 | assert(Scale > 0 && "Unexpected scaling factor")((void)0); | ||||||||||
| 445 | |||||||||||
| 446 | // Fast-path: if no scaling, then it is just a copy. | ||||||||||
| 447 | if (Scale == 1) { | ||||||||||
| 448 | ScaledMask.assign(Mask.begin(), Mask.end()); | ||||||||||
| 449 | return true; | ||||||||||
| 450 | } | ||||||||||
| 451 | |||||||||||
| 452 | // We must map the original elements down evenly to a type with less elements. | ||||||||||
| 453 | int NumElts = Mask.size(); | ||||||||||
| 454 | if (NumElts % Scale != 0) | ||||||||||
| 455 | return false; | ||||||||||
| 456 | |||||||||||
| 457 | ScaledMask.clear(); | ||||||||||
| 458 | ScaledMask.reserve(NumElts / Scale); | ||||||||||
| 459 | |||||||||||
| 460 | // Step through the input mask by splitting into Scale-sized slices. | ||||||||||
| 461 | do { | ||||||||||
| 462 | ArrayRef<int> MaskSlice = Mask.take_front(Scale); | ||||||||||
| 463 | assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.")((void)0); | ||||||||||
| 464 | |||||||||||
| 465 | // The first element of the slice determines how we evaluate this slice. | ||||||||||
| 466 | int SliceFront = MaskSlice.front(); | ||||||||||
| 467 | if (SliceFront < 0) { | ||||||||||
| 468 | // Negative values (undef or other "sentinel" values) must be equal across | ||||||||||
| 469 | // the entire slice. | ||||||||||
| 470 | if (!is_splat(MaskSlice)) | ||||||||||
| 471 | return false; | ||||||||||
| 472 | ScaledMask.push_back(SliceFront); | ||||||||||
| 473 | } else { | ||||||||||
| 474 | // A positive mask element must be cleanly divisible. | ||||||||||
| 475 | if (SliceFront % Scale != 0) | ||||||||||
| 476 | return false; | ||||||||||
| 477 | // Elements of the slice must be consecutive. | ||||||||||
| 478 | for (int i = 1; i < Scale; ++i) | ||||||||||
| 479 | if (MaskSlice[i] != SliceFront + i) | ||||||||||
| 480 | return false; | ||||||||||
| 481 | ScaledMask.push_back(SliceFront / Scale); | ||||||||||
| 482 | } | ||||||||||
| 483 | Mask = Mask.drop_front(Scale); | ||||||||||
| 484 | } while (!Mask.empty()); | ||||||||||
| 485 | |||||||||||
| 486 | assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask")((void)0); | ||||||||||
| 487 | |||||||||||
| 488 | // All elements of the original mask can be scaled down to map to the elements | ||||||||||
| 489 | // of a mask with wider elements. | ||||||||||
| 490 | return true; | ||||||||||
| 491 | } | ||||||||||
| 492 | |||||||||||
| 493 | MapVector<Instruction *, uint64_t> | ||||||||||
| 494 | llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB, | ||||||||||
| 495 | const TargetTransformInfo *TTI) { | ||||||||||
| 496 | |||||||||||
| 497 | // DemandedBits will give us every value's live-out bits. But we want | ||||||||||
| 498 | // to ensure no extra casts would need to be inserted, so every DAG | ||||||||||
| 499 | // of connected values must have the same minimum bitwidth. | ||||||||||
| 500 | EquivalenceClasses<Value *> ECs; | ||||||||||
| 501 | SmallVector<Value *, 16> Worklist; | ||||||||||
| 502 | SmallPtrSet<Value *, 4> Roots; | ||||||||||
| 503 | SmallPtrSet<Value *, 16> Visited; | ||||||||||
| 504 | DenseMap<Value *, uint64_t> DBits; | ||||||||||
| 505 | SmallPtrSet<Instruction *, 4> InstructionSet; | ||||||||||
| 506 | MapVector<Instruction *, uint64_t> MinBWs; | ||||||||||
| 507 | |||||||||||
| 508 | // Determine the roots. We work bottom-up, from truncs or icmps. | ||||||||||
| 509 | bool SeenExtFromIllegalType = false; | ||||||||||
| 510 | for (auto *BB : Blocks) | ||||||||||
| 511 | for (auto &I : *BB) { | ||||||||||
| 512 | InstructionSet.insert(&I); | ||||||||||
| 513 | |||||||||||
| 514 | if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) && | ||||||||||
| 515 | !TTI->isTypeLegal(I.getOperand(0)->getType())) | ||||||||||
| 516 | SeenExtFromIllegalType = true; | ||||||||||
| 517 | |||||||||||
| 518 | // Only deal with non-vector integers up to 64-bits wide. | ||||||||||
| 519 | if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) && | ||||||||||
| 520 | !I.getType()->isVectorTy() && | ||||||||||
| 521 | I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) { | ||||||||||
| 522 | // Don't make work for ourselves. If we know the loaded type is legal, | ||||||||||
| 523 | // don't add it to the worklist. | ||||||||||
| 524 | if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType())) | ||||||||||
| 525 | continue; | ||||||||||
| 526 | |||||||||||
| 527 | Worklist.push_back(&I); | ||||||||||
| 528 | Roots.insert(&I); | ||||||||||
| 529 | } | ||||||||||
| 530 | } | ||||||||||
| 531 | // Early exit. | ||||||||||
| 532 | if (Worklist.empty() || (TTI && !SeenExtFromIllegalType)) | ||||||||||
| 533 | return MinBWs; | ||||||||||
| 534 | |||||||||||
| 535 | // Now proceed breadth-first, unioning values together. | ||||||||||
| 536 | while (!Worklist.empty()) { | ||||||||||
| 537 | Value *Val = Worklist.pop_back_val(); | ||||||||||
| 538 | Value *Leader = ECs.getOrInsertLeaderValue(Val); | ||||||||||
| 539 | |||||||||||
| 540 | if (Visited.count(Val)) | ||||||||||
| 541 | continue; | ||||||||||
| 542 | Visited.insert(Val); | ||||||||||
| 543 | |||||||||||
| 544 | // Non-instructions terminate a chain successfully. | ||||||||||
| 545 | if (!isa<Instruction>(Val)) | ||||||||||
| 546 | continue; | ||||||||||
| 547 | Instruction *I = cast<Instruction>(Val); | ||||||||||
| 548 | |||||||||||
| 549 | // If we encounter a type that is larger than 64 bits, we can't represent | ||||||||||
| 550 | // it so bail out. | ||||||||||
| 551 | if (DB.getDemandedBits(I).getBitWidth() > 64) | ||||||||||
| 552 | return MapVector<Instruction *, uint64_t>(); | ||||||||||
| 553 | |||||||||||
| 554 | uint64_t V = DB.getDemandedBits(I).getZExtValue(); | ||||||||||
| 555 | DBits[Leader] |= V; | ||||||||||
| 556 | DBits[I] = V; | ||||||||||
| 557 | |||||||||||
| 558 | // Casts, loads and instructions outside of our range terminate a chain | ||||||||||
| 559 | // successfully. | ||||||||||
| 560 | if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) || | ||||||||||
| 561 | !InstructionSet.count(I)) | ||||||||||
| 562 | continue; | ||||||||||
| 563 | |||||||||||
| 564 | // Unsafe casts terminate a chain unsuccessfully. We can't do anything | ||||||||||
| 565 | // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to | ||||||||||
| 566 | // transform anything that relies on them. | ||||||||||
| 567 | if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) || | ||||||||||
| 568 | !I->getType()->isIntegerTy()) { | ||||||||||
| 569 | DBits[Leader] |= ~0ULL; | ||||||||||
| 570 | continue; | ||||||||||
| 571 | } | ||||||||||
| 572 | |||||||||||
| 573 | // We don't modify the types of PHIs. Reductions will already have been | ||||||||||
| 574 | // truncated if possible, and inductions' sizes will have been chosen by | ||||||||||
| 575 | // indvars. | ||||||||||
| 576 | if (isa<PHINode>(I)) | ||||||||||
| 577 | continue; | ||||||||||
| 578 | |||||||||||
| 579 | if (DBits[Leader] == ~0ULL) | ||||||||||
| 580 | // All bits demanded, no point continuing. | ||||||||||
| 581 | continue; | ||||||||||
| 582 | |||||||||||
| 583 | for (Value *O : cast<User>(I)->operands()) { | ||||||||||
| 584 | ECs.unionSets(Leader, O); | ||||||||||
| 585 | Worklist.push_back(O); | ||||||||||
| 586 | } | ||||||||||
| 587 | } | ||||||||||
| 588 | |||||||||||
| 589 | // Now we've discovered all values, walk them to see if there are | ||||||||||
| 590 | // any users we didn't see. If there are, we can't optimize that | ||||||||||
| 591 | // chain. | ||||||||||
| 592 | for (auto &I : DBits) | ||||||||||
| 593 | for (auto *U : I.first->users()) | ||||||||||
| 594 | if (U->getType()->isIntegerTy() && DBits.count(U) == 0) | ||||||||||
| 595 | DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL; | ||||||||||
| 596 | |||||||||||
| 597 | for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) { | ||||||||||
| 598 | uint64_t LeaderDemandedBits = 0; | ||||||||||
| 599 | for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) | ||||||||||
| 600 | LeaderDemandedBits |= DBits[M]; | ||||||||||
| 601 | |||||||||||
| 602 | uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) - | ||||||||||
| 603 | llvm::countLeadingZeros(LeaderDemandedBits); | ||||||||||
| 604 | // Round up to a power of 2 | ||||||||||
| 605 | if (!isPowerOf2_64((uint64_t)MinBW)) | ||||||||||
| 606 | MinBW = NextPowerOf2(MinBW); | ||||||||||
| 607 | |||||||||||
| 608 | // We don't modify the types of PHIs. Reductions will already have been | ||||||||||
| 609 | // truncated if possible, and inductions' sizes will have been chosen by | ||||||||||
| 610 | // indvars. | ||||||||||
| 611 | // If we are required to shrink a PHI, abandon this entire equivalence class. | ||||||||||
| 612 | bool Abort = false; | ||||||||||
| 613 | for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) | ||||||||||
| 614 | if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) { | ||||||||||
| 615 | Abort = true; | ||||||||||
| 616 | break; | ||||||||||
| 617 | } | ||||||||||
| 618 | if (Abort) | ||||||||||
| 619 | continue; | ||||||||||
| 620 | |||||||||||
| 621 | for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) { | ||||||||||
| 622 | if (!isa<Instruction>(M)) | ||||||||||
| 623 | continue; | ||||||||||
| 624 | Type *Ty = M->getType(); | ||||||||||
| 625 | if (Roots.count(M)) | ||||||||||
| 626 | Ty = cast<Instruction>(M)->getOperand(0)->getType(); | ||||||||||
| 627 | if (MinBW < Ty->getScalarSizeInBits()) | ||||||||||
| 628 | MinBWs[cast<Instruction>(M)] = MinBW; | ||||||||||
| 629 | } | ||||||||||
| 630 | } | ||||||||||
| 631 | |||||||||||
| 632 | return MinBWs; | ||||||||||
| 633 | } | ||||||||||
| 634 | |||||||||||
| 635 | /// Add all access groups in @p AccGroups to @p List. | ||||||||||
| 636 | template <typename ListT> | ||||||||||
| 637 | static void addToAccessGroupList(ListT &List, MDNode *AccGroups) { | ||||||||||
| 638 | // Interpret an access group as a list containing itself. | ||||||||||
| 639 | if (AccGroups->getNumOperands() == 0) { | ||||||||||
| 640 | assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group")((void)0); | ||||||||||
| 641 | List.insert(AccGroups); | ||||||||||
| 642 | return; | ||||||||||
| 643 | } | ||||||||||
| 644 | |||||||||||
| 645 | for (auto &AccGroupListOp : AccGroups->operands()) { | ||||||||||
| 646 | auto *Item = cast<MDNode>(AccGroupListOp.get()); | ||||||||||
| 647 | assert(isValidAsAccessGroup(Item) && "List item must be an access group")((void)0); | ||||||||||
| 648 | List.insert(Item); | ||||||||||
| 649 | } | ||||||||||
| 650 | } | ||||||||||
| 651 | |||||||||||
| 652 | MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) { | ||||||||||
| 653 | if (!AccGroups1) | ||||||||||
| 654 | return AccGroups2; | ||||||||||
| 655 | if (!AccGroups2) | ||||||||||
| 656 | return AccGroups1; | ||||||||||
| 657 | if (AccGroups1 == AccGroups2) | ||||||||||
| 658 | return AccGroups1; | ||||||||||
| 659 | |||||||||||
| 660 | SmallSetVector<Metadata *, 4> Union; | ||||||||||
| 661 | addToAccessGroupList(Union, AccGroups1); | ||||||||||
| 662 | addToAccessGroupList(Union, AccGroups2); | ||||||||||
| 663 | |||||||||||
| 664 | if (Union.size() == 0) | ||||||||||
| 665 | return nullptr; | ||||||||||
| 666 | if (Union.size() == 1) | ||||||||||
| 667 | return cast<MDNode>(Union.front()); | ||||||||||
| 668 | |||||||||||
| 669 | LLVMContext &Ctx = AccGroups1->getContext(); | ||||||||||
| 670 | return MDNode::get(Ctx, Union.getArrayRef()); | ||||||||||
| 671 | } | ||||||||||
| 672 | |||||||||||
| 673 | MDNode *llvm::intersectAccessGroups(const Instruction *Inst1, | ||||||||||
| 674 | const Instruction *Inst2) { | ||||||||||
| 675 | bool MayAccessMem1 = Inst1->mayReadOrWriteMemory(); | ||||||||||
| 676 | bool MayAccessMem2 = Inst2->mayReadOrWriteMemory(); | ||||||||||
| 677 | |||||||||||
| 678 | if (!MayAccessMem1 && !MayAccessMem2) | ||||||||||
| 679 | return nullptr; | ||||||||||
| 680 | if (!MayAccessMem1) | ||||||||||
| 681 | return Inst2->getMetadata(LLVMContext::MD_access_group); | ||||||||||
| 682 | if (!MayAccessMem2) | ||||||||||
| 683 | return Inst1->getMetadata(LLVMContext::MD_access_group); | ||||||||||
| 684 | |||||||||||
| 685 | MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group); | ||||||||||
| 686 | MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group); | ||||||||||
| 687 | if (!MD1 || !MD2) | ||||||||||
| 688 | return nullptr; | ||||||||||
| 689 | if (MD1 == MD2) | ||||||||||
| 690 | return MD1; | ||||||||||
| 691 | |||||||||||
| 692 | // Use set for scalable 'contains' check. | ||||||||||
| 693 | SmallPtrSet<Metadata *, 4> AccGroupSet2; | ||||||||||
| 694 | addToAccessGroupList(AccGroupSet2, MD2); | ||||||||||
| 695 | |||||||||||
| 696 | SmallVector<Metadata *, 4> Intersection; | ||||||||||
| 697 | if (MD1->getNumOperands() == 0) { | ||||||||||
| 698 | assert(isValidAsAccessGroup(MD1) && "Node must be an access group")((void)0); | ||||||||||
| 699 | if (AccGroupSet2.count(MD1)) | ||||||||||
| 700 | Intersection.push_back(MD1); | ||||||||||
| 701 | } else { | ||||||||||
| 702 | for (const MDOperand &Node : MD1->operands()) { | ||||||||||
| 703 | auto *Item = cast<MDNode>(Node.get()); | ||||||||||
| 704 | assert(isValidAsAccessGroup(Item) && "List item must be an access group")((void)0); | ||||||||||
| 705 | if (AccGroupSet2.count(Item)) | ||||||||||
| 706 | Intersection.push_back(Item); | ||||||||||
| 707 | } | ||||||||||
| 708 | } | ||||||||||
| 709 | |||||||||||
| 710 | if (Intersection.size() == 0) | ||||||||||
| 711 | return nullptr; | ||||||||||
| 712 | if (Intersection.size() == 1) | ||||||||||
| 713 | return cast<MDNode>(Intersection.front()); | ||||||||||
| 714 | |||||||||||
| 715 | LLVMContext &Ctx = Inst1->getContext(); | ||||||||||
| 716 | return MDNode::get(Ctx, Intersection); | ||||||||||
| 717 | } | ||||||||||
| 718 | |||||||||||
| 719 | /// \returns \p I after propagating metadata from \p VL. | ||||||||||
| 720 | Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) { | ||||||||||
| 721 | if (VL.empty()) | ||||||||||
| 722 | return Inst; | ||||||||||
| 723 | Instruction *I0 = cast<Instruction>(VL[0]); | ||||||||||
| 724 | SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata; | ||||||||||
| 725 | I0->getAllMetadataOtherThanDebugLoc(Metadata); | ||||||||||
| 726 | |||||||||||
| 727 | for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, | ||||||||||
| 728 | LLVMContext::MD_noalias, LLVMContext::MD_fpmath, | ||||||||||
| 729 | LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load, | ||||||||||
| 730 | LLVMContext::MD_access_group}) { | ||||||||||
| 731 | MDNode *MD = I0->getMetadata(Kind); | ||||||||||
| 732 | |||||||||||
| 733 | for (int J = 1, E = VL.size(); MD && J != E; ++J) { | ||||||||||
| 734 | const Instruction *IJ = cast<Instruction>(VL[J]); | ||||||||||
| 735 | MDNode *IMD = IJ->getMetadata(Kind); | ||||||||||
| 736 | switch (Kind) { | ||||||||||
| 737 | case LLVMContext::MD_tbaa: | ||||||||||
| 738 | MD = MDNode::getMostGenericTBAA(MD, IMD); | ||||||||||
| 739 | break; | ||||||||||
| 740 | case LLVMContext::MD_alias_scope: | ||||||||||
| 741 | MD = MDNode::getMostGenericAliasScope(MD, IMD); | ||||||||||
| 742 | break; | ||||||||||
| 743 | case LLVMContext::MD_fpmath: | ||||||||||
| 744 | MD = MDNode::getMostGenericFPMath(MD, IMD); | ||||||||||
| 745 | break; | ||||||||||
| 746 | case LLVMContext::MD_noalias: | ||||||||||
| 747 | case LLVMContext::MD_nontemporal: | ||||||||||
| 748 | case LLVMContext::MD_invariant_load: | ||||||||||
| 749 | MD = MDNode::intersect(MD, IMD); | ||||||||||
| 750 | break; | ||||||||||
| 751 | case LLVMContext::MD_access_group: | ||||||||||
| 752 | MD = intersectAccessGroups(Inst, IJ); | ||||||||||
| 753 | break; | ||||||||||
| 754 | default: | ||||||||||
| 755 | llvm_unreachable("unhandled metadata")__builtin_unreachable(); | ||||||||||
| 756 | } | ||||||||||
| 757 | } | ||||||||||
| 758 | |||||||||||
| 759 | Inst->setMetadata(Kind, MD); | ||||||||||
| 760 | } | ||||||||||
| 761 | |||||||||||
| 762 | return Inst; | ||||||||||
| 763 | } | ||||||||||
| 764 | |||||||||||
| 765 | Constant * | ||||||||||
| 766 | llvm::createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, | ||||||||||
| 767 | const InterleaveGroup<Instruction> &Group) { | ||||||||||
| 768 | // All 1's means mask is not needed. | ||||||||||
| 769 | if (Group.getNumMembers() == Group.getFactor()) | ||||||||||
| 770 | return nullptr; | ||||||||||
| 771 | |||||||||||
| 772 | // TODO: support reversed access. | ||||||||||
| 773 | assert(!Group.isReverse() && "Reversed group not supported.")((void)0); | ||||||||||
| 774 | |||||||||||
| 775 | SmallVector<Constant *, 16> Mask; | ||||||||||
| 776 | for (unsigned i = 0; i < VF; i++) | ||||||||||
| 777 | for (unsigned j = 0; j < Group.getFactor(); ++j) { | ||||||||||
| 778 | unsigned HasMember = Group.getMember(j) ? 1 : 0; | ||||||||||
| 779 | Mask.push_back(Builder.getInt1(HasMember)); | ||||||||||
| 780 | } | ||||||||||
| 781 | |||||||||||
| 782 | return ConstantVector::get(Mask); | ||||||||||
| 783 | } | ||||||||||
| 784 | |||||||||||
| 785 | llvm::SmallVector<int, 16> | ||||||||||
| 786 | llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) { | ||||||||||
| 787 | SmallVector<int, 16> MaskVec; | ||||||||||
| 788 | for (unsigned i = 0; i < VF; i++) | ||||||||||
| 789 | for (unsigned j = 0; j < ReplicationFactor; j++) | ||||||||||
| 790 | MaskVec.push_back(i); | ||||||||||
| 791 | |||||||||||
| 792 | return MaskVec; | ||||||||||
| 793 | } | ||||||||||
| 794 | |||||||||||
| 795 | llvm::SmallVector<int, 16> llvm::createInterleaveMask(unsigned VF, | ||||||||||
| 796 | unsigned NumVecs) { | ||||||||||
| 797 | SmallVector<int, 16> Mask; | ||||||||||
| 798 | for (unsigned i = 0; i < VF; i++) | ||||||||||
| 799 | for (unsigned j = 0; j < NumVecs; j++) | ||||||||||
| 800 | Mask.push_back(j * VF + i); | ||||||||||
| 801 | |||||||||||
| 802 | return Mask; | ||||||||||
| 803 | } | ||||||||||
| 804 | |||||||||||
| 805 | llvm::SmallVector<int, 16> | ||||||||||
| 806 | llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) { | ||||||||||
| 807 | SmallVector<int, 16> Mask; | ||||||||||
| 808 | for (unsigned i = 0; i < VF; i++) | ||||||||||
| 809 | Mask.push_back(Start + i * Stride); | ||||||||||
| 810 | |||||||||||
| 811 | return Mask; | ||||||||||
| 812 | } | ||||||||||
| 813 | |||||||||||
| 814 | llvm::SmallVector<int, 16> llvm::createSequentialMask(unsigned Start, | ||||||||||
| 815 | unsigned NumInts, | ||||||||||
| 816 | unsigned NumUndefs) { | ||||||||||
| 817 | SmallVector<int, 16> Mask; | ||||||||||
| 818 | for (unsigned i = 0; i < NumInts; i++) | ||||||||||
| 819 | Mask.push_back(Start + i); | ||||||||||
| 820 | |||||||||||
| 821 | for (unsigned i = 0; i < NumUndefs; i++) | ||||||||||
| 822 | Mask.push_back(-1); | ||||||||||
| 823 | |||||||||||
| 824 | return Mask; | ||||||||||
| 825 | } | ||||||||||
| 826 | |||||||||||
| 827 | /// A helper function for concatenating vectors. This function concatenates two | ||||||||||
| 828 | /// vectors having the same element type. If the second vector has fewer | ||||||||||
| 829 | /// elements than the first, it is padded with undefs. | ||||||||||
| 830 | static Value *concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, | ||||||||||
| 831 | Value *V2) { | ||||||||||
| 832 | VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType()); | ||||||||||
| 833 | VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType()); | ||||||||||
| 834 | assert(VecTy1 && VecTy2 &&((void)0) | ||||||||||
| 835 | VecTy1->getScalarType() == VecTy2->getScalarType() &&((void)0) | ||||||||||
| 836 | "Expect two vectors with the same element type")((void)0); | ||||||||||
| 837 | |||||||||||
| 838 | unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements(); | ||||||||||
| 839 | unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements(); | ||||||||||
| 840 | assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements")((void)0); | ||||||||||
| 841 | |||||||||||
| 842 | if (NumElts1 > NumElts2) { | ||||||||||
| 843 | // Extend with UNDEFs. | ||||||||||
| 844 | V2 = Builder.CreateShuffleVector( | ||||||||||
| 845 | V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2)); | ||||||||||
| 846 | } | ||||||||||
| 847 | |||||||||||
| 848 | return Builder.CreateShuffleVector( | ||||||||||
| 849 | V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0)); | ||||||||||
| 850 | } | ||||||||||
| 851 | |||||||||||
| 852 | Value *llvm::concatenateVectors(IRBuilderBase &Builder, | ||||||||||
| 853 | ArrayRef<Value *> Vecs) { | ||||||||||
| 854 | unsigned NumVecs = Vecs.size(); | ||||||||||
| 855 | assert(NumVecs > 1 && "Should be at least two vectors")((void)0); | ||||||||||
| 856 | |||||||||||
| 857 | SmallVector<Value *, 8> ResList; | ||||||||||
| 858 | ResList.append(Vecs.begin(), Vecs.end()); | ||||||||||
| 859 | do { | ||||||||||
| 860 | SmallVector<Value *, 8> TmpList; | ||||||||||
| 861 | for (unsigned i = 0; i < NumVecs - 1; i += 2) { | ||||||||||
| 862 | Value *V0 = ResList[i], *V1 = ResList[i + 1]; | ||||||||||
| 863 | assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&((void)0) | ||||||||||
| 864 | "Only the last vector may have a different type")((void)0); | ||||||||||
| 865 | |||||||||||
| 866 | TmpList.push_back(concatenateTwoVectors(Builder, V0, V1)); | ||||||||||
| 867 | } | ||||||||||
| 868 | |||||||||||
| 869 | // Push the last vector if the total number of vectors is odd. | ||||||||||
| 870 | if (NumVecs % 2 != 0) | ||||||||||
| 871 | TmpList.push_back(ResList[NumVecs - 1]); | ||||||||||
| 872 | |||||||||||
| 873 | ResList = TmpList; | ||||||||||
| 874 | NumVecs = ResList.size(); | ||||||||||
| 875 | } while (NumVecs > 1); | ||||||||||
| 876 | |||||||||||
| 877 | return ResList[0]; | ||||||||||
| 878 | } | ||||||||||
| 879 | |||||||||||
| 880 | bool llvm::maskIsAllZeroOrUndef(Value *Mask) { | ||||||||||
| 881 | assert(isa<VectorType>(Mask->getType()) &&((void)0) | ||||||||||
| 882 | isa<IntegerType>(Mask->getType()->getScalarType()) &&((void)0) | ||||||||||
| 883 | cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==((void)0) | ||||||||||
| 884 | 1 &&((void)0) | ||||||||||
| 885 | "Mask must be a vector of i1")((void)0); | ||||||||||
| 886 | |||||||||||
| 887 | auto *ConstMask = dyn_cast<Constant>(Mask); | ||||||||||
| 888 | if (!ConstMask) | ||||||||||
| 889 | return false; | ||||||||||
| 890 | if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask)) | ||||||||||
| 891 | return true; | ||||||||||
| 892 | if (isa<ScalableVectorType>(ConstMask->getType())) | ||||||||||
| 893 | return false; | ||||||||||
| 894 | for (unsigned | ||||||||||
| 895 | I = 0, | ||||||||||
| 896 | E = cast<FixedVectorType>(ConstMask->getType())->getNumElements(); | ||||||||||
| 897 | I != E; ++I) { | ||||||||||
| 898 | if (auto *MaskElt = ConstMask->getAggregateElement(I)) | ||||||||||
| 899 | if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt)) | ||||||||||
| 900 | continue; | ||||||||||
| 901 | return false; | ||||||||||
| 902 | } | ||||||||||
| 903 | return true; | ||||||||||
| 904 | } | ||||||||||
| 905 | |||||||||||
| 906 | bool llvm::maskIsAllOneOrUndef(Value *Mask) { | ||||||||||
| 907 | assert(isa<VectorType>(Mask->getType()) &&((void)0) | ||||||||||
| 908 | isa<IntegerType>(Mask->getType()->getScalarType()) &&((void)0) | ||||||||||
| 909 | cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==((void)0) | ||||||||||
| 910 | 1 &&((void)0) | ||||||||||
| 911 | "Mask must be a vector of i1")((void)0); | ||||||||||
| 912 | |||||||||||
| 913 | auto *ConstMask = dyn_cast<Constant>(Mask); | ||||||||||
| 914 | if (!ConstMask) | ||||||||||
| 915 | return false; | ||||||||||
| 916 | if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask)) | ||||||||||
| 917 | return true; | ||||||||||
| 918 | if (isa<ScalableVectorType>(ConstMask->getType())) | ||||||||||
| 919 | return false; | ||||||||||
| 920 | for (unsigned | ||||||||||
| 921 | I = 0, | ||||||||||
| 922 | E = cast<FixedVectorType>(ConstMask->getType())->getNumElements(); | ||||||||||
| 923 | I != E; ++I) { | ||||||||||
| 924 | if (auto *MaskElt = ConstMask->getAggregateElement(I)) | ||||||||||
| 925 | if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt)) | ||||||||||
| 926 | continue; | ||||||||||
| 927 | return false; | ||||||||||
| 928 | } | ||||||||||
| 929 | return true; | ||||||||||
| 930 | } | ||||||||||
| 931 | |||||||||||
| 932 | /// TODO: This is a lot like known bits, but for | ||||||||||
| 933 | /// vectors. Is there something we can common this with? | ||||||||||
| 934 | APInt llvm::possiblyDemandedEltsInMask(Value *Mask) { | ||||||||||
| 935 | assert(isa<FixedVectorType>(Mask->getType()) &&((void)0) | ||||||||||
| 936 | isa<IntegerType>(Mask->getType()->getScalarType()) &&((void)0) | ||||||||||
| 937 | cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==((void)0) | ||||||||||
| 938 | 1 &&((void)0) | ||||||||||
| 939 | "Mask must be a fixed width vector of i1")((void)0); | ||||||||||
| 940 | |||||||||||
| 941 | const unsigned VWidth = | ||||||||||
| 942 | cast<FixedVectorType>(Mask->getType())->getNumElements(); | ||||||||||
| 943 | APInt DemandedElts = APInt::getAllOnesValue(VWidth); | ||||||||||
| 944 | if (auto *CV = dyn_cast<ConstantVector>(Mask)) | ||||||||||
| 945 | for (unsigned i = 0; i < VWidth; i++) | ||||||||||
| 946 | if (CV->getAggregateElement(i)->isNullValue()) | ||||||||||
| 947 | DemandedElts.clearBit(i); | ||||||||||
| 948 | return DemandedElts; | ||||||||||
| 949 | } | ||||||||||
| 950 | |||||||||||
| 951 | bool InterleavedAccessInfo::isStrided(int Stride) { | ||||||||||
| 952 | unsigned Factor = std::abs(Stride); | ||||||||||
| 953 | return Factor >= 2 && Factor <= MaxInterleaveGroupFactor; | ||||||||||
| 954 | } | ||||||||||
| 955 | |||||||||||
| 956 | void InterleavedAccessInfo::collectConstStrideAccesses( | ||||||||||
| 957 | MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo, | ||||||||||
| 958 | const ValueToValueMap &Strides) { | ||||||||||
| 959 | auto &DL = TheLoop->getHeader()->getModule()->getDataLayout(); | ||||||||||
| 960 | |||||||||||
| 961 | // Since it's desired that the load/store instructions be maintained in | ||||||||||
| 962 | // "program order" for the interleaved access analysis, we have to visit the | ||||||||||
| 963 | // blocks in the loop in reverse postorder (i.e., in a topological order). | ||||||||||
| 964 | // Such an ordering will ensure that any load/store that may be executed | ||||||||||
| 965 | // before a second load/store will precede the second load/store in | ||||||||||
| 966 | // AccessStrideInfo. | ||||||||||
| 967 | LoopBlocksDFS DFS(TheLoop); | ||||||||||
| 968 | DFS.perform(LI); | ||||||||||
| 969 | for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) | ||||||||||
| 970 | for (auto &I : *BB) { | ||||||||||
| 971 | Value *Ptr = getLoadStorePointerOperand(&I); | ||||||||||
| 972 | if (!Ptr) | ||||||||||
| 973 | continue; | ||||||||||
| 974 | Type *ElementTy = getLoadStoreType(&I); | ||||||||||
| 975 | |||||||||||
| 976 | // We don't check wrapping here because we don't know yet if Ptr will be | ||||||||||
| 977 | // part of a full group or a group with gaps. Checking wrapping for all | ||||||||||
| 978 | // pointers (even those that end up in groups with no gaps) will be overly | ||||||||||
| 979 | // conservative. For full groups, wrapping should be ok since if we would | ||||||||||
| 980 | // wrap around the address space we would do a memory access at nullptr | ||||||||||
| 981 | // even without the transformation. The wrapping checks are therefore | ||||||||||
| 982 | // deferred until after we've formed the interleaved groups. | ||||||||||
| 983 | int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, | ||||||||||
| 984 | /*Assume=*/true, /*ShouldCheckWrap=*/false); | ||||||||||
| 985 | |||||||||||
| 986 | const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); | ||||||||||
| 987 | uint64_t Size = DL.getTypeAllocSize(ElementTy); | ||||||||||
| 988 | AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, | ||||||||||
| 989 | getLoadStoreAlignment(&I)); | ||||||||||
| 990 | } | ||||||||||
| 991 | } | ||||||||||
| 992 | |||||||||||
| 993 | // Analyze interleaved accesses and collect them into interleaved load and | ||||||||||
| 994 | // store groups. | ||||||||||
| 995 | // | ||||||||||
| 996 | // When generating code for an interleaved load group, we effectively hoist all | ||||||||||
| 997 | // loads in the group to the location of the first load in program order. When | ||||||||||
| 998 | // generating code for an interleaved store group, we sink all stores to the | ||||||||||
| 999 | // location of the last store. This code motion can change the order of load | ||||||||||
| 1000 | // and store instructions and may break dependences. | ||||||||||
| 1001 | // | ||||||||||
| 1002 | // The code generation strategy mentioned above ensures that we won't violate | ||||||||||
| 1003 | // any write-after-read (WAR) dependences. | ||||||||||
| 1004 | // | ||||||||||
| 1005 | // E.g., for the WAR dependence: a = A[i]; // (1) | ||||||||||
| 1006 | // A[i] = b; // (2) | ||||||||||
| 1007 | // | ||||||||||
| 1008 | // The store group of (2) is always inserted at or below (2), and the load | ||||||||||
| 1009 | // group of (1) is always inserted at or above (1). Thus, the instructions will | ||||||||||
| 1010 | // never be reordered. All other dependences are checked to ensure the | ||||||||||
| 1011 | // correctness of the instruction reordering. | ||||||||||
| 1012 | // | ||||||||||
| 1013 | // The algorithm visits all memory accesses in the loop in bottom-up program | ||||||||||
| 1014 | // order. Program order is established by traversing the blocks in the loop in | ||||||||||
| 1015 | // reverse postorder when collecting the accesses. | ||||||||||
| 1016 | // | ||||||||||
| 1017 | // We visit the memory accesses in bottom-up order because it can simplify the | ||||||||||
| 1018 | // construction of store groups in the presence of write-after-write (WAW) | ||||||||||
| 1019 | // dependences. | ||||||||||
| 1020 | // | ||||||||||
| 1021 | // E.g., for the WAW dependence: A[i] = a; // (1) | ||||||||||
| 1022 | // A[i] = b; // (2) | ||||||||||
| 1023 | // A[i + 1] = c; // (3) | ||||||||||
| 1024 | // | ||||||||||
| 1025 | // We will first create a store group with (3) and (2). (1) can't be added to | ||||||||||
| 1026 | // this group because it and (2) are dependent. However, (1) can be grouped | ||||||||||
| 1027 | // with other accesses that may precede it in program order. Note that a | ||||||||||
| 1028 | // bottom-up order does not imply that WAW dependences should not be checked. | ||||||||||
| 1029 | void InterleavedAccessInfo::analyzeInterleaving( | ||||||||||
| 1030 | bool EnablePredicatedInterleavedMemAccesses) { | ||||||||||
| 1031 | LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n")do { } while (false); | ||||||||||
| |||||||||||
| 1032 | const ValueToValueMap &Strides = LAI->getSymbolicStrides(); | ||||||||||
| 1033 | |||||||||||
| 1034 | // Holds all accesses with a constant stride. | ||||||||||
| 1035 | MapVector<Instruction *, StrideDescriptor> AccessStrideInfo; | ||||||||||
| 1036 | collectConstStrideAccesses(AccessStrideInfo, Strides); | ||||||||||
| 1037 | |||||||||||
| 1038 | if (AccessStrideInfo.empty()) | ||||||||||
| 1039 | return; | ||||||||||
| 1040 | |||||||||||
| 1041 | // Collect the dependences in the loop. | ||||||||||
| 1042 | collectDependences(); | ||||||||||
| 1043 | |||||||||||
| 1044 | // Holds all interleaved store groups temporarily. | ||||||||||
| 1045 | SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups; | ||||||||||
| 1046 | // Holds all interleaved load groups temporarily. | ||||||||||
| 1047 | SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups; | ||||||||||
| 1048 | |||||||||||
| 1049 | // Search in bottom-up program order for pairs of accesses (A and B) that can | ||||||||||
| 1050 | // form interleaved load or store groups. In the algorithm below, access A | ||||||||||
| 1051 | // precedes access B in program order. We initialize a group for B in the | ||||||||||
| 1052 | // outer loop of the algorithm, and then in the inner loop, we attempt to | ||||||||||
| 1053 | // insert each A into B's group if: | ||||||||||
| 1054 | // | ||||||||||
| 1055 | // 1. A and B have the same stride, | ||||||||||
| 1056 | // 2. A and B have the same memory object size, and | ||||||||||
| 1057 | // 3. A belongs in B's group according to its distance from B. | ||||||||||
| 1058 | // | ||||||||||
| 1059 | // Special care is taken to ensure group formation will not break any | ||||||||||
| 1060 | // dependences. | ||||||||||
| 1061 | for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend(); | ||||||||||
| 1062 | BI != E; ++BI) { | ||||||||||
| 1063 | Instruction *B = BI->first; | ||||||||||
| 1064 | StrideDescriptor DesB = BI->second; | ||||||||||
| 1065 | |||||||||||
| 1066 | // Initialize a group for B if it has an allowable stride. Even if we don't | ||||||||||
| 1067 | // create a group for B, we continue with the bottom-up algorithm to ensure | ||||||||||
| 1068 | // we don't break any of B's dependences. | ||||||||||
| 1069 | InterleaveGroup<Instruction> *Group = nullptr; | ||||||||||
| 1070 | if (isStrided(DesB.Stride) && | ||||||||||
| 1071 | (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) { | ||||||||||
| 1072 | Group = getInterleaveGroup(B); | ||||||||||
| 1073 | if (!Group) { | ||||||||||
| 1074 | LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *Bdo { } while (false) | ||||||||||
| 1075 | << '\n')do { } while (false); | ||||||||||
| 1076 | Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment); | ||||||||||
| 1077 | } | ||||||||||
| 1078 | if (B->mayWriteToMemory()) | ||||||||||
| 1079 | StoreGroups.insert(Group); | ||||||||||
| 1080 | else | ||||||||||
| 1081 | LoadGroups.insert(Group); | ||||||||||
| 1082 | } | ||||||||||
| 1083 | |||||||||||
| 1084 | for (auto AI = std::next(BI); AI != E; ++AI) { | ||||||||||
| 1085 | Instruction *A = AI->first; | ||||||||||
| 1086 | StrideDescriptor DesA = AI->second; | ||||||||||
| 1087 | |||||||||||
| 1088 | // Our code motion strategy implies that we can't have dependences | ||||||||||
| 1089 | // between accesses in an interleaved group and other accesses located | ||||||||||
| 1090 | // between the first and last member of the group. Note that this also | ||||||||||
| 1091 | // means that a group can't have more than one member at a given offset. | ||||||||||
| 1092 | // The accesses in a group can have dependences with other accesses, but | ||||||||||
| 1093 | // we must ensure we don't extend the boundaries of the group such that | ||||||||||
| 1094 | // we encompass those dependent accesses. | ||||||||||
| 1095 | // | ||||||||||
| 1096 | // For example, assume we have the sequence of accesses shown below in a | ||||||||||
| 1097 | // stride-2 loop: | ||||||||||
| 1098 | // | ||||||||||
| 1099 | // (1, 2) is a group | A[i] = a; // (1) | ||||||||||
| 1100 | // | A[i-1] = b; // (2) | | ||||||||||
| 1101 | // A[i-3] = c; // (3) | ||||||||||
| 1102 | // A[i] = d; // (4) | (2, 4) is not a group | ||||||||||
| 1103 | // | ||||||||||
| 1104 | // Because accesses (2) and (3) are dependent, we can group (2) with (1) | ||||||||||
| 1105 | // but not with (4). If we did, the dependent access (3) would be within | ||||||||||
| 1106 | // the boundaries of the (2, 4) group. | ||||||||||
| 1107 | if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) { | ||||||||||
| 1108 | // If a dependence exists and A is already in a group, we know that A | ||||||||||
| 1109 | // must be a store since A precedes B and WAR dependences are allowed. | ||||||||||
| 1110 | // Thus, A would be sunk below B. We release A's group to prevent this | ||||||||||
| 1111 | // illegal code motion. A will then be free to form another group with | ||||||||||
| 1112 | // instructions that precede it. | ||||||||||
| 1113 | if (isInterleaved(A)) { | ||||||||||
| 1114 | InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A); | ||||||||||
| 1115 | |||||||||||
| 1116 | LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "do { } while (false) | ||||||||||
| 1117 | "dependence between " << *A << " and "<< *B << '\n')do { } while (false); | ||||||||||
| 1118 | |||||||||||
| 1119 | StoreGroups.remove(StoreGroup); | ||||||||||
| 1120 | releaseGroup(StoreGroup); | ||||||||||
| 1121 | } | ||||||||||
| 1122 | |||||||||||
| 1123 | // If a dependence exists and A is not already in a group (or it was | ||||||||||
| 1124 | // and we just released it), B might be hoisted above A (if B is a | ||||||||||
| 1125 | // load) or another store might be sunk below A (if B is a store). In | ||||||||||
| 1126 | // either case, we can't add additional instructions to B's group. B | ||||||||||
| 1127 | // will only form a group with instructions that it precedes. | ||||||||||
| 1128 | break; | ||||||||||
| 1129 | } | ||||||||||
| 1130 | |||||||||||
| 1131 | // At this point, we've checked for illegal code motion. If either A or B | ||||||||||
| 1132 | // isn't strided, there's nothing left to do. | ||||||||||
| 1133 | if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride)) | ||||||||||
| 1134 | continue; | ||||||||||
| 1135 | |||||||||||
| 1136 | // Ignore A if it's already in a group or isn't the same kind of memory | ||||||||||
| 1137 | // operation as B. | ||||||||||
| 1138 | // Note that mayReadFromMemory() isn't mutually exclusive to | ||||||||||
| 1139 | // mayWriteToMemory in the case of atomic loads. We shouldn't see those | ||||||||||
| 1140 | // here, canVectorizeMemory() should have returned false - except for the | ||||||||||
| 1141 | // case we asked for optimization remarks. | ||||||||||
| 1142 | if (isInterleaved(A) || | ||||||||||
| 1143 | (A->mayReadFromMemory() != B->mayReadFromMemory()) || | ||||||||||
| 1144 | (A->mayWriteToMemory() != B->mayWriteToMemory())) | ||||||||||
| 1145 | continue; | ||||||||||
| 1146 | |||||||||||
| 1147 | // Check rules 1 and 2. Ignore A if its stride or size is different from | ||||||||||
| 1148 | // that of B. | ||||||||||
| 1149 | if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size) | ||||||||||
| 1150 | continue; | ||||||||||
| 1151 | |||||||||||
| 1152 | // Ignore A if the memory object of A and B don't belong to the same | ||||||||||
| 1153 | // address space | ||||||||||
| 1154 | if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B)) | ||||||||||
| 1155 | continue; | ||||||||||
| 1156 | |||||||||||
| 1157 | // Calculate the distance from A to B. | ||||||||||
| 1158 | const SCEVConstant *DistToB = dyn_cast<SCEVConstant>( | ||||||||||
| 1159 | PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev)); | ||||||||||
| 1160 | if (!DistToB
| ||||||||||
| 1161 | continue; | ||||||||||
| 1162 | int64_t DistanceToB = DistToB->getAPInt().getSExtValue(); | ||||||||||
| 1163 | |||||||||||
| 1164 | // Check rule 3. Ignore A if its distance to B is not a multiple of the | ||||||||||
| 1165 | // size. | ||||||||||
| 1166 | if (DistanceToB % static_cast<int64_t>(DesB.Size)) | ||||||||||
| 1167 | continue; | ||||||||||
| 1168 | |||||||||||
| 1169 | // All members of a predicated interleave-group must have the same predicate, | ||||||||||
| 1170 | // and currently must reside in the same BB. | ||||||||||
| 1171 | BasicBlock *BlockA = A->getParent(); | ||||||||||
| 1172 | BasicBlock *BlockB = B->getParent(); | ||||||||||
| 1173 | if ((isPredicated(BlockA) || isPredicated(BlockB)) && | ||||||||||
| 1174 | (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB)) | ||||||||||
| 1175 | continue; | ||||||||||
| 1176 | |||||||||||
| 1177 | // The index of A is the index of B plus A's distance to B in multiples | ||||||||||
| 1178 | // of the size. | ||||||||||
| 1179 | int IndexA = | ||||||||||
| 1180 | Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size); | ||||||||||
| |||||||||||
| 1181 | |||||||||||
| 1182 | // Try to insert A into B's group. | ||||||||||
| 1183 | if (Group->insertMember(A, IndexA, DesA.Alignment)) { | ||||||||||
| 1184 | LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'do { } while (false) | ||||||||||
| 1185 | << " into the interleave group with" << *Bdo { } while (false) | ||||||||||
| 1186 | << '\n')do { } while (false); | ||||||||||
| 1187 | InterleaveGroupMap[A] = Group; | ||||||||||
| 1188 | |||||||||||
| 1189 | // Set the first load in program order as the insert position. | ||||||||||
| 1190 | if (A->mayReadFromMemory()) | ||||||||||
| 1191 | Group->setInsertPos(A); | ||||||||||
| 1192 | } | ||||||||||
| 1193 | } // Iteration over A accesses. | ||||||||||
| 1194 | } // Iteration over B accesses. | ||||||||||
| 1195 | |||||||||||
| 1196 | // Remove interleaved store groups with gaps. | ||||||||||
| 1197 | for (auto *Group : StoreGroups) | ||||||||||
| 1198 | if (Group->getNumMembers() != Group->getFactor()) { | ||||||||||
| 1199 | LLVM_DEBUG(do { } while (false) | ||||||||||
| 1200 | dbgs() << "LV: Invalidate candidate interleaved store group due "do { } while (false) | ||||||||||
| 1201 | "to gaps.\n")do { } while (false); | ||||||||||
| 1202 | releaseGroup(Group); | ||||||||||
| 1203 | } | ||||||||||
| 1204 | // Remove interleaved groups with gaps (currently only loads) whose memory | ||||||||||
| 1205 | // accesses may wrap around. We have to revisit the getPtrStride analysis, | ||||||||||
| 1206 | // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does | ||||||||||
| 1207 | // not check wrapping (see documentation there). | ||||||||||
| 1208 | // FORNOW we use Assume=false; | ||||||||||
| 1209 | // TODO: Change to Assume=true but making sure we don't exceed the threshold | ||||||||||
| 1210 | // of runtime SCEV assumptions checks (thereby potentially failing to | ||||||||||
| 1211 | // vectorize altogether). | ||||||||||
| 1212 | // Additional optional optimizations: | ||||||||||
| 1213 | // TODO: If we are peeling the loop and we know that the first pointer doesn't | ||||||||||
| 1214 | // wrap then we can deduce that all pointers in the group don't wrap. | ||||||||||
| 1215 | // This means that we can forcefully peel the loop in order to only have to | ||||||||||
| 1216 | // check the first pointer for no-wrap. When we'll change to use Assume=true | ||||||||||
| 1217 | // we'll only need at most one runtime check per interleaved group. | ||||||||||
| 1218 | for (auto *Group : LoadGroups) { | ||||||||||
| 1219 | // Case 1: A full group. Can Skip the checks; For full groups, if the wide | ||||||||||
| 1220 | // load would wrap around the address space we would do a memory access at | ||||||||||
| 1221 | // nullptr even without the transformation. | ||||||||||
| 1222 | if (Group->getNumMembers() == Group->getFactor()) | ||||||||||
| 1223 | continue; | ||||||||||
| 1224 | |||||||||||
| 1225 | // Case 2: If first and last members of the group don't wrap this implies | ||||||||||
| 1226 | // that all the pointers in the group don't wrap. | ||||||||||
| 1227 | // So we check only group member 0 (which is always guaranteed to exist), | ||||||||||
| 1228 | // and group member Factor - 1; If the latter doesn't exist we rely on | ||||||||||
| 1229 | // peeling (if it is a non-reversed accsess -- see Case 3). | ||||||||||
| 1230 | Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0)); | ||||||||||
| 1231 | if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false, | ||||||||||
| 1232 | /*ShouldCheckWrap=*/true)) { | ||||||||||
| 1233 | LLVM_DEBUG(do { } while (false) | ||||||||||
| 1234 | dbgs() << "LV: Invalidate candidate interleaved group due to "do { } while (false) | ||||||||||
| 1235 | "first group member potentially pointer-wrapping.\n")do { } while (false); | ||||||||||
| 1236 | releaseGroup(Group); | ||||||||||
| 1237 | continue; | ||||||||||
| 1238 | } | ||||||||||
| 1239 | Instruction *LastMember = Group->getMember(Group->getFactor() - 1); | ||||||||||
| 1240 | if (LastMember) { | ||||||||||
| 1241 | Value *LastMemberPtr = getLoadStorePointerOperand(LastMember); | ||||||||||
| 1242 | if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false, | ||||||||||
| 1243 | /*ShouldCheckWrap=*/true)) { | ||||||||||
| 1244 | LLVM_DEBUG(do { } while (false) | ||||||||||
| 1245 | dbgs() << "LV: Invalidate candidate interleaved group due to "do { } while (false) | ||||||||||
| 1246 | "last group member potentially pointer-wrapping.\n")do { } while (false); | ||||||||||
| 1247 | releaseGroup(Group); | ||||||||||
| 1248 | } | ||||||||||
| 1249 | } else { | ||||||||||
| 1250 | // Case 3: A non-reversed interleaved load group with gaps: We need | ||||||||||
| 1251 | // to execute at least one scalar epilogue iteration. This will ensure | ||||||||||
| 1252 | // we don't speculatively access memory out-of-bounds. We only need | ||||||||||
| 1253 | // to look for a member at index factor - 1, since every group must have | ||||||||||
| 1254 | // a member at index zero. | ||||||||||
| 1255 | if (Group->isReverse()) { | ||||||||||
| 1256 | LLVM_DEBUG(do { } while (false) | ||||||||||
| 1257 | dbgs() << "LV: Invalidate candidate interleaved group due to "do { } while (false) | ||||||||||
| 1258 | "a reverse access with gaps.\n")do { } while (false); | ||||||||||
| 1259 | releaseGroup(Group); | ||||||||||
| 1260 | continue; | ||||||||||
| 1261 | } | ||||||||||
| 1262 | LLVM_DEBUG(do { } while (false) | ||||||||||
| 1263 | dbgs() << "LV: Interleaved group requires epilogue iteration.\n")do { } while (false); | ||||||||||
| 1264 | RequiresScalarEpilogue = true; | ||||||||||
| 1265 | } | ||||||||||
| 1266 | } | ||||||||||
| 1267 | } | ||||||||||
| 1268 | |||||||||||
| 1269 | void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() { | ||||||||||
| 1270 | // If no group had triggered the requirement to create an epilogue loop, | ||||||||||
| 1271 | // there is nothing to do. | ||||||||||
| 1272 | if (!requiresScalarEpilogue()) | ||||||||||
| 1273 | return; | ||||||||||
| 1274 | |||||||||||
| 1275 | bool ReleasedGroup = false; | ||||||||||
| 1276 | // Release groups requiring scalar epilogues. Note that this also removes them | ||||||||||
| 1277 | // from InterleaveGroups. | ||||||||||
| 1278 | for (auto *Group : make_early_inc_range(InterleaveGroups)) { | ||||||||||
| 1279 | if (!Group->requiresScalarEpilogue()) | ||||||||||
| 1280 | continue; | ||||||||||
| 1281 | LLVM_DEBUG(do { } while (false) | ||||||||||
| 1282 | dbgs()do { } while (false) | ||||||||||
| 1283 | << "LV: Invalidate candidate interleaved group due to gaps that "do { } while (false) | ||||||||||
| 1284 | "require a scalar epilogue (not allowed under optsize) and cannot "do { } while (false) | ||||||||||
| 1285 | "be masked (not enabled). \n")do { } while (false); | ||||||||||
| 1286 | releaseGroup(Group); | ||||||||||
| 1287 | ReleasedGroup = true; | ||||||||||
| 1288 | } | ||||||||||
| 1289 | assert(ReleasedGroup && "At least one group must be invalidated, as a "((void)0) | ||||||||||
| 1290 | "scalar epilogue was required")((void)0); | ||||||||||
| 1291 | (void)ReleasedGroup; | ||||||||||
| 1292 | RequiresScalarEpilogue = false; | ||||||||||
| 1293 | } | ||||||||||
| 1294 | |||||||||||
| 1295 | template <typename InstT> | ||||||||||
| 1296 | void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const { | ||||||||||
| 1297 | llvm_unreachable("addMetadata can only be used for Instruction")__builtin_unreachable(); | ||||||||||
| 1298 | } | ||||||||||
| 1299 | |||||||||||
| 1300 | namespace llvm { | ||||||||||
| 1301 | template <> | ||||||||||
| 1302 | void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const { | ||||||||||
| 1303 | SmallVector<Value *, 4> VL; | ||||||||||
| 1304 | std::transform(Members.begin(), Members.end(), std::back_inserter(VL), | ||||||||||
| 1305 | [](std::pair<int, Instruction *> p) { return p.second; }); | ||||||||||
| 1306 | propagateMetadata(NewInst, VL); | ||||||||||
| 1307 | } | ||||||||||
| 1308 | } | ||||||||||
| 1309 | |||||||||||
| 1310 | std::string VFABI::mangleTLIVectorName(StringRef VectorName, | ||||||||||
| 1311 | StringRef ScalarName, unsigned numArgs, | ||||||||||
| 1312 | ElementCount VF) { | ||||||||||
| 1313 | SmallString<256> Buffer; | ||||||||||
| 1314 | llvm::raw_svector_ostream Out(Buffer); | ||||||||||
| 1315 | Out << "_ZGV" << VFABI::_LLVM_ << "N"; | ||||||||||
| 1316 | if (VF.isScalable()) | ||||||||||
| 1317 | Out << 'x'; | ||||||||||
| 1318 | else | ||||||||||
| 1319 | Out << VF.getFixedValue(); | ||||||||||
| 1320 | for (unsigned I = 0; I < numArgs; ++I) | ||||||||||
| 1321 | Out << "v"; | ||||||||||
| 1322 | Out << "_" << ScalarName << "(" << VectorName << ")"; | ||||||||||
| 1323 | return std::string(Out.str()); | ||||||||||
| 1324 | } | ||||||||||
| 1325 | |||||||||||
| 1326 | void VFABI::getVectorVariantNames( | ||||||||||
| 1327 | const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) { | ||||||||||
| 1328 | const StringRef S = | ||||||||||
| 1329 | CI.getAttribute(AttributeList::FunctionIndex, VFABI::MappingsAttrName) | ||||||||||
| 1330 | .getValueAsString(); | ||||||||||
| 1331 | if (S.empty()) | ||||||||||
| 1332 | return; | ||||||||||
| 1333 | |||||||||||
| 1334 | SmallVector<StringRef, 8> ListAttr; | ||||||||||
| 1335 | S.split(ListAttr, ","); | ||||||||||
| 1336 | |||||||||||
| 1337 | for (auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) { | ||||||||||
| 1338 | #ifndef NDEBUG1 | ||||||||||
| 1339 | LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n")do { } while (false); | ||||||||||
| 1340 | Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S, *(CI.getModule())); | ||||||||||
| 1341 | assert(Info.hasValue() && "Invalid name for a VFABI variant.")((void)0); | ||||||||||
| 1342 | assert(CI.getModule()->getFunction(Info.getValue().VectorName) &&((void)0) | ||||||||||
| 1343 | "Vector function is missing.")((void)0); | ||||||||||
| 1344 | #endif | ||||||||||
| 1345 | VariantMappings.push_back(std::string(S)); | ||||||||||
| 1346 | } | ||||||||||
| 1347 | } | ||||||||||
| 1348 | |||||||||||
| 1349 | bool VFShape::hasValidParameterList() const { | ||||||||||
| 1350 | for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams; | ||||||||||
| 1351 | ++Pos) { | ||||||||||
| 1352 | assert(Parameters[Pos].ParamPos == Pos && "Broken parameter list.")((void)0); | ||||||||||
| 1353 | |||||||||||
| 1354 | switch (Parameters[Pos].ParamKind) { | ||||||||||
| 1355 | default: // Nothing to check. | ||||||||||
| 1356 | break; | ||||||||||
| 1357 | case VFParamKind::OMP_Linear: | ||||||||||
| 1358 | case VFParamKind::OMP_LinearRef: | ||||||||||
| 1359 | case VFParamKind::OMP_LinearVal: | ||||||||||
| 1360 | case VFParamKind::OMP_LinearUVal: | ||||||||||
| 1361 | // Compile time linear steps must be non-zero. | ||||||||||
| 1362 | if (Parameters[Pos].LinearStepOrPos == 0) | ||||||||||
| 1363 | return false; | ||||||||||
| 1364 | break; | ||||||||||
| 1365 | case VFParamKind::OMP_LinearPos: | ||||||||||
| 1366 | case VFParamKind::OMP_LinearRefPos: | ||||||||||
| 1367 | case VFParamKind::OMP_LinearValPos: | ||||||||||
| 1368 | case VFParamKind::OMP_LinearUValPos: | ||||||||||
| 1369 | // The runtime linear step must be referring to some other | ||||||||||
| 1370 | // parameters in the signature. | ||||||||||
| 1371 | if (Parameters[Pos].LinearStepOrPos >= int(NumParams)) | ||||||||||
| 1372 | return false; | ||||||||||
| 1373 | // The linear step parameter must be marked as uniform. | ||||||||||
| 1374 | if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind != | ||||||||||
| 1375 | VFParamKind::OMP_Uniform) | ||||||||||
| 1376 | return false; | ||||||||||
| 1377 | // The linear step parameter can't point at itself. | ||||||||||
| 1378 | if (Parameters[Pos].LinearStepOrPos == int(Pos)) | ||||||||||
| 1379 | return false; | ||||||||||
| 1380 | break; | ||||||||||
| 1381 | case VFParamKind::GlobalPredicate: | ||||||||||
| 1382 | // The global predicate must be the unique. Can be placed anywhere in the | ||||||||||
| 1383 | // signature. | ||||||||||
| 1384 | for (unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos) | ||||||||||
| 1385 | if (Parameters[NextPos].ParamKind == VFParamKind::GlobalPredicate) | ||||||||||
| 1386 | return false; | ||||||||||
| 1387 | break; | ||||||||||
| 1388 | } | ||||||||||
| 1389 | } | ||||||||||
| 1390 | return true; | ||||||||||
| 1391 | } |
| 1 | // -*- C++ -*- |
| 2 | //===----------------------------------------------------------------------===// |
| 3 | // |
| 4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 5 | // See https://llvm.org/LICENSE.txt for license information. |
| 6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #ifndef _LIBCPP___ITERATOR_REVERSE_ITERATOR_H |
| 11 | #define _LIBCPP___ITERATOR_REVERSE_ITERATOR_H |
| 12 | |
| 13 | #include <__config> |
| 14 | #include <__iterator/iterator.h> |
| 15 | #include <__iterator/iterator_traits.h> |
| 16 | #include <__memory/addressof.h> |
| 17 | #include <type_traits> |
| 18 | |
| 19 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
| 20 | #pragma GCC system_header |
| 21 | #endif |
| 22 | |
| 23 | _LIBCPP_PUSH_MACROSpush_macro("min") push_macro("max") |
| 24 | #include <__undef_macros> |
| 25 | |
| 26 | _LIBCPP_BEGIN_NAMESPACE_STDnamespace std { inline namespace __1 { |
| 27 | |
| 28 | template <class _Tp, class = void> |
| 29 | struct __is_stashing_iterator : false_type {}; |
| 30 | |
| 31 | template <class _Tp> |
| 32 | struct __is_stashing_iterator<_Tp, typename __void_t<typename _Tp::__stashing_iterator_tag>::type> |
| 33 | : true_type {}; |
| 34 | |
| 35 | _LIBCPP_SUPPRESS_DEPRECATED_PUSHGCC diagnostic push
GCC diagnostic ignored "-Wdeprecated"
GCC diagnostic ignored "-Wdeprecated-declarations" |
| 36 | template <class _Iter> |
| 37 | class _LIBCPP_TEMPLATE_VIS__attribute__ ((__type_visibility__("default"))) reverse_iterator |
| 38 | #if _LIBCPP_STD_VER14 <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) |
| 39 | : public iterator<typename iterator_traits<_Iter>::iterator_category, |
| 40 | typename iterator_traits<_Iter>::value_type, |
| 41 | typename iterator_traits<_Iter>::difference_type, |
| 42 | typename iterator_traits<_Iter>::pointer, |
| 43 | typename iterator_traits<_Iter>::reference> |
| 44 | #endif |
| 45 | { |
| 46 | _LIBCPP_SUPPRESS_DEPRECATED_POPGCC diagnostic pop |
| 47 | private: |
| 48 | #ifndef _LIBCPP_ABI_NO_ITERATOR_BASES |
| 49 | _Iter __t; // no longer used as of LWG #2360, not removed due to ABI break |
| 50 | #endif |
| 51 | |
| 52 | static_assert(!__is_stashing_iterator<_Iter>::value, |
| 53 | "The specified iterator type cannot be used with reverse_iterator; " |
| 54 | "Using stashing iterators with reverse_iterator causes undefined behavior"); |
| 55 | |
| 56 | protected: |
| 57 | _Iter current; |
| 58 | public: |
| 59 | typedef _Iter iterator_type; |
| 60 | typedef typename iterator_traits<_Iter>::difference_type difference_type; |
| 61 | typedef typename iterator_traits<_Iter>::reference reference; |
| 62 | typedef typename iterator_traits<_Iter>::pointer pointer; |
| 63 | typedef _If<__is_cpp17_random_access_iterator<_Iter>::value, |
| 64 | random_access_iterator_tag, |
| 65 | typename iterator_traits<_Iter>::iterator_category> iterator_category; |
| 66 | typedef typename iterator_traits<_Iter>::value_type value_type; |
| 67 | |
| 68 | #if _LIBCPP_STD_VER14 > 17 |
| 69 | typedef _If<__is_cpp17_random_access_iterator<_Iter>::value, |
| 70 | random_access_iterator_tag, |
| 71 | bidirectional_iterator_tag> iterator_concept; |
| 72 | #endif |
| 73 | |
| 74 | #ifndef _LIBCPP_ABI_NO_ITERATOR_BASES |
| 75 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 76 | reverse_iterator() : __t(), current() {} |
| 77 | |
| 78 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 79 | explicit reverse_iterator(_Iter __x) : __t(__x), current(__x) {} |
| 80 | |
| 81 | template <class _Up, class = _EnableIf< |
| 82 | !is_same<_Up, _Iter>::value && is_convertible<_Up const&, _Iter>::value |
| 83 | > > |
| 84 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 85 | reverse_iterator(const reverse_iterator<_Up>& __u) |
| 86 | : __t(__u.base()), current(__u.base()) |
| 87 | { } |
| 88 | |
| 89 | template <class _Up, class = _EnableIf< |
| 90 | !is_same<_Up, _Iter>::value && |
| 91 | is_convertible<_Up const&, _Iter>::value && |
| 92 | is_assignable<_Up const&, _Iter>::value |
| 93 | > > |
| 94 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 95 | reverse_iterator& operator=(const reverse_iterator<_Up>& __u) { |
| 96 | __t = current = __u.base(); |
| 97 | return *this; |
| 98 | } |
| 99 | #else |
| 100 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 101 | reverse_iterator() : current() {} |
| 102 | |
| 103 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 104 | explicit reverse_iterator(_Iter __x) : current(__x) {} |
| 105 | |
| 106 | template <class _Up, class = _EnableIf< |
| 107 | !is_same<_Up, _Iter>::value && is_convertible<_Up const&, _Iter>::value |
| 108 | > > |
| 109 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 110 | reverse_iterator(const reverse_iterator<_Up>& __u) |
| 111 | : current(__u.base()) |
| 112 | { } |
| 113 | |
| 114 | template <class _Up, class = _EnableIf< |
| 115 | !is_same<_Up, _Iter>::value && |
| 116 | is_convertible<_Up const&, _Iter>::value && |
| 117 | is_assignable<_Up const&, _Iter>::value |
| 118 | > > |
| 119 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 120 | reverse_iterator& operator=(const reverse_iterator<_Up>& __u) { |
| 121 | current = __u.base(); |
| 122 | return *this; |
| 123 | } |
| 124 | #endif |
| 125 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 126 | _Iter base() const {return current;} |
| 127 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 128 | reference operator*() const {_Iter __tmp = current; return *--__tmp;} |
| 129 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 130 | pointer operator->() const {return _VSTDstd::__1::addressof(operator*());} |
| 131 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 132 | reverse_iterator& operator++() {--current; return *this;} |
| 133 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 134 | reverse_iterator operator++(int) {reverse_iterator __tmp(*this); --current; return __tmp;} |
| 135 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 136 | reverse_iterator& operator--() {++current; return *this;} |
| 137 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 138 | reverse_iterator operator--(int) {reverse_iterator __tmp(*this); ++current; return __tmp;} |
| 139 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 140 | reverse_iterator operator+ (difference_type __n) const {return reverse_iterator(current - __n);} |
| 141 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 142 | reverse_iterator& operator+=(difference_type __n) {current -= __n; return *this;} |
| 143 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 144 | reverse_iterator operator- (difference_type __n) const {return reverse_iterator(current + __n);} |
| 145 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 146 | reverse_iterator& operator-=(difference_type __n) {current += __n; return *this;} |
| 147 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 148 | reference operator[](difference_type __n) const {return *(*this + __n);} |
| 149 | }; |
| 150 | |
| 151 | template <class _Iter1, class _Iter2> |
| 152 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 153 | bool |
| 154 | operator==(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
| 155 | { |
| 156 | return __x.base() == __y.base(); |
| 157 | } |
| 158 | |
| 159 | template <class _Iter1, class _Iter2> |
| 160 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 161 | bool |
| 162 | operator<(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
| 163 | { |
| 164 | return __x.base() > __y.base(); |
| 165 | } |
| 166 | |
| 167 | template <class _Iter1, class _Iter2> |
| 168 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 169 | bool |
| 170 | operator!=(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
| 171 | { |
| 172 | return __x.base() != __y.base(); |
| 173 | } |
| 174 | |
| 175 | template <class _Iter1, class _Iter2> |
| 176 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 177 | bool |
| 178 | operator>(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
| 179 | { |
| 180 | return __x.base() < __y.base(); |
| 181 | } |
| 182 | |
| 183 | template <class _Iter1, class _Iter2> |
| 184 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 185 | bool |
| 186 | operator>=(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
| 187 | { |
| 188 | return __x.base() <= __y.base(); |
| 189 | } |
| 190 | |
| 191 | template <class _Iter1, class _Iter2> |
| 192 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 193 | bool |
| 194 | operator<=(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
| 195 | { |
| 196 | return __x.base() >= __y.base(); |
| 197 | } |
| 198 | |
| 199 | #ifndef _LIBCPP_CXX03_LANG |
| 200 | template <class _Iter1, class _Iter2> |
| 201 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 202 | auto |
| 203 | operator-(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
| 204 | -> decltype(__y.base() - __x.base()) |
| 205 | { |
| 206 | return __y.base() - __x.base(); |
| 207 | } |
| 208 | #else |
| 209 | template <class _Iter1, class _Iter2> |
| 210 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) |
| 211 | typename reverse_iterator<_Iter1>::difference_type |
| 212 | operator-(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) |
| 213 | { |
| 214 | return __y.base() - __x.base(); |
| 215 | } |
| 216 | #endif |
| 217 | |
| 218 | template <class _Iter> |
| 219 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 220 | reverse_iterator<_Iter> |
| 221 | operator+(typename reverse_iterator<_Iter>::difference_type __n, const reverse_iterator<_Iter>& __x) |
| 222 | { |
| 223 | return reverse_iterator<_Iter>(__x.base() - __n); |
| 224 | } |
| 225 | |
| 226 | #if _LIBCPP_STD_VER14 > 11 |
| 227 | template <class _Iter> |
| 228 | inline _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_AFTER_CXX14 |
| 229 | reverse_iterator<_Iter> make_reverse_iterator(_Iter __i) |
| 230 | { |
| 231 | return reverse_iterator<_Iter>(__i); |
| 232 | } |
| 233 | #endif |
| 234 | |
| 235 | _LIBCPP_END_NAMESPACE_STD} } |
| 236 | |
| 237 | _LIBCPP_POP_MACROSpop_macro("min") pop_macro("max") |
| 238 | |
| 239 | #endif // _LIBCPP___ITERATOR_REVERSE_ITERATOR_H |
| 1 | // -*- C++ -*- |
| 2 | //===----------------------------------------------------------------------===// |
| 3 | // |
| 4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 5 | // See https://llvm.org/LICENSE.txt for license information. |
| 6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #ifndef _LIBCPP___ITERATOR_WRAP_ITER_H |
| 11 | #define _LIBCPP___ITERATOR_WRAP_ITER_H |
| 12 | |
| 13 | #include <__config> |
| 14 | #include <__debug> |
| 15 | #include <__iterator/iterator_traits.h> |
| 16 | #include <__memory/pointer_traits.h> // __to_address |
| 17 | #include <type_traits> |
| 18 | |
| 19 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
| 20 | #pragma GCC system_header |
| 21 | #endif |
| 22 | |
| 23 | _LIBCPP_PUSH_MACROSpush_macro("min") push_macro("max") |
| 24 | #include <__undef_macros> |
| 25 | |
| 26 | _LIBCPP_BEGIN_NAMESPACE_STDnamespace std { inline namespace __1 { |
| 27 | |
| 28 | template <class _Iter> |
| 29 | class __wrap_iter |
| 30 | { |
| 31 | public: |
| 32 | typedef _Iter iterator_type; |
| 33 | typedef typename iterator_traits<iterator_type>::value_type value_type; |
| 34 | typedef typename iterator_traits<iterator_type>::difference_type difference_type; |
| 35 | typedef typename iterator_traits<iterator_type>::pointer pointer; |
| 36 | typedef typename iterator_traits<iterator_type>::reference reference; |
| 37 | typedef typename iterator_traits<iterator_type>::iterator_category iterator_category; |
| 38 | #if _LIBCPP_STD_VER14 > 17 |
| 39 | typedef contiguous_iterator_tag iterator_concept; |
| 40 | #endif |
| 41 | |
| 42 | private: |
| 43 | iterator_type __i; |
| 44 | public: |
| 45 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter() _NOEXCEPTnoexcept |
| 46 | #if _LIBCPP_STD_VER14 > 11 |
| 47 | : __i{} |
| 48 | #endif |
| 49 | { |
| 50 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
| 51 | __get_db()->__insert_i(this); |
| 52 | #endif |
| 53 | } |
| 54 | template <class _Up> _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 55 | __wrap_iter(const __wrap_iter<_Up>& __u, |
| 56 | typename enable_if<is_convertible<_Up, iterator_type>::value>::type* = nullptr) _NOEXCEPTnoexcept |
| 57 | : __i(__u.base()) |
| 58 | { |
| 59 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
| 60 | __get_db()->__iterator_copy(this, &__u); |
| 61 | #endif |
| 62 | } |
| 63 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
| 64 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 65 | __wrap_iter(const __wrap_iter& __x) |
| 66 | : __i(__x.base()) |
| 67 | { |
| 68 | __get_db()->__iterator_copy(this, &__x); |
| 69 | } |
| 70 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 71 | __wrap_iter& operator=(const __wrap_iter& __x) |
| 72 | { |
| 73 | if (this != &__x) |
| 74 | { |
| 75 | __get_db()->__iterator_copy(this, &__x); |
| 76 | __i = __x.__i; |
| 77 | } |
| 78 | return *this; |
| 79 | } |
| 80 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 81 | ~__wrap_iter() |
| 82 | { |
| 83 | __get_db()->__erase_i(this); |
| 84 | } |
| 85 | #endif |
| 86 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr reference operator*() const _NOEXCEPTnoexcept |
| 87 | { |
| 88 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
| 89 | _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this),((void)0) |
| 90 | "Attempted to dereference a non-dereferenceable iterator")((void)0); |
| 91 | #endif |
| 92 | return *__i; |
| 93 | } |
| 94 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr pointer operator->() const _NOEXCEPTnoexcept |
| 95 | { |
| 96 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
| 97 | _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this),((void)0) |
| 98 | "Attempted to dereference a non-dereferenceable iterator")((void)0); |
| 99 | #endif |
| 100 | return _VSTDstd::__1::__to_address(__i); |
| 101 | } |
| 102 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter& operator++() _NOEXCEPTnoexcept |
| 103 | { |
| 104 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
| 105 | _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this),((void)0) |
| 106 | "Attempted to increment a non-incrementable iterator")((void)0); |
| 107 | #endif |
| 108 | ++__i; |
| 109 | return *this; |
| 110 | } |
| 111 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter operator++(int) _NOEXCEPTnoexcept |
| 112 | {__wrap_iter __tmp(*this); ++(*this); return __tmp;} |
| 113 | |
| 114 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter& operator--() _NOEXCEPTnoexcept |
| 115 | { |
| 116 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
| 117 | _LIBCPP_ASSERT(__get_const_db()->__decrementable(this),((void)0) |
| 118 | "Attempted to decrement a non-decrementable iterator")((void)0); |
| 119 | #endif |
| 120 | --__i; |
| 121 | return *this; |
| 122 | } |
| 123 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter operator--(int) _NOEXCEPTnoexcept |
| 124 | {__wrap_iter __tmp(*this); --(*this); return __tmp;} |
| 125 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter operator+ (difference_type __n) const _NOEXCEPTnoexcept |
| 126 | {__wrap_iter __w(*this); __w += __n; return __w;} |
| 127 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter& operator+=(difference_type __n) _NOEXCEPTnoexcept |
| 128 | { |
| 129 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
| 130 | _LIBCPP_ASSERT(__get_const_db()->__addable(this, __n),((void)0) |
| 131 | "Attempted to add/subtract an iterator outside its valid range")((void)0); |
| 132 | #endif |
| 133 | __i += __n; |
| 134 | return *this; |
| 135 | } |
| 136 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter operator- (difference_type __n) const _NOEXCEPTnoexcept |
| 137 | {return *this + (-__n);} |
| 138 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter& operator-=(difference_type __n) _NOEXCEPTnoexcept |
| 139 | {*this += -__n; return *this;} |
| 140 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr reference operator[](difference_type __n) const _NOEXCEPTnoexcept |
| 141 | { |
| 142 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
| 143 | _LIBCPP_ASSERT(__get_const_db()->__subscriptable(this, __n),((void)0) |
| 144 | "Attempted to subscript an iterator outside its valid range")((void)0); |
| 145 | #endif |
| 146 | return __i[__n]; |
| 147 | } |
| 148 | |
| 149 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr iterator_type base() const _NOEXCEPTnoexcept {return __i;} |
| 150 | |
| 151 | private: |
| 152 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
| 153 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter(const void* __p, iterator_type __x) : __i(__x) |
| 154 | { |
| 155 | __get_db()->__insert_ic(this, __p); |
| 156 | } |
| 157 | #else |
| 158 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr __wrap_iter(iterator_type __x) _NOEXCEPTnoexcept : __i(__x) {} |
| 159 | #endif |
| 160 | |
| 161 | template <class _Up> friend class __wrap_iter; |
| 162 | template <class _CharT, class _Traits, class _Alloc> friend class basic_string; |
| 163 | template <class _Tp, class _Alloc> friend class _LIBCPP_TEMPLATE_VIS__attribute__ ((__type_visibility__("default"))) vector; |
| 164 | template <class _Tp, size_t> friend class _LIBCPP_TEMPLATE_VIS__attribute__ ((__type_visibility__("default"))) span; |
| 165 | }; |
| 166 | |
| 167 | template <class _Iter1> |
| 168 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 169 | bool operator==(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPTnoexcept |
| 170 | { |
| 171 | return __x.base() == __y.base(); |
| 172 | } |
| 173 | |
| 174 | template <class _Iter1, class _Iter2> |
| 175 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 176 | bool operator==(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
| 177 | { |
| 178 | return __x.base() == __y.base(); |
| 179 | } |
| 180 | |
| 181 | template <class _Iter1> |
| 182 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 183 | bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPTnoexcept |
| 184 | { |
| 185 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
| 186 | _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y),((void)0) |
| 187 | "Attempted to compare incomparable iterators")((void)0); |
| 188 | #endif |
| 189 | return __x.base() < __y.base(); |
| 190 | } |
| 191 | |
| 192 | template <class _Iter1, class _Iter2> |
| 193 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 194 | bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
| 195 | { |
| 196 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
| 197 | _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y),((void)0) |
| 198 | "Attempted to compare incomparable iterators")((void)0); |
| 199 | #endif |
| 200 | return __x.base() < __y.base(); |
| 201 | } |
| 202 | |
| 203 | template <class _Iter1> |
| 204 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 205 | bool operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPTnoexcept |
| 206 | { |
| 207 | return !(__x == __y); |
| 208 | } |
| 209 | |
| 210 | template <class _Iter1, class _Iter2> |
| 211 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 212 | bool operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
| 213 | { |
| 214 | return !(__x == __y); |
| 215 | } |
| 216 | |
| 217 | template <class _Iter1> |
| 218 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 219 | bool operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPTnoexcept |
| 220 | { |
| 221 | return __y < __x; |
| 222 | } |
| 223 | |
| 224 | template <class _Iter1, class _Iter2> |
| 225 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 226 | bool operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
| 227 | { |
| 228 | return __y < __x; |
| 229 | } |
| 230 | |
| 231 | template <class _Iter1> |
| 232 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 233 | bool operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPTnoexcept |
| 234 | { |
| 235 | return !(__x < __y); |
| 236 | } |
| 237 | |
| 238 | template <class _Iter1, class _Iter2> |
| 239 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 240 | bool operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
| 241 | { |
| 242 | return !(__x < __y); |
| 243 | } |
| 244 | |
| 245 | template <class _Iter1> |
| 246 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 247 | bool operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPTnoexcept |
| 248 | { |
| 249 | return !(__y < __x); |
| 250 | } |
| 251 | |
| 252 | template <class _Iter1, class _Iter2> |
| 253 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 254 | bool operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
| 255 | { |
| 256 | return !(__y < __x); |
| 257 | } |
| 258 | |
| 259 | template <class _Iter1, class _Iter2> |
| 260 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 261 | #ifndef _LIBCPP_CXX03_LANG |
| 262 | auto operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
| 263 | -> decltype(__x.base() - __y.base()) |
| 264 | #else |
| 265 | typename __wrap_iter<_Iter1>::difference_type |
| 266 | operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPTnoexcept |
| 267 | #endif // C++03 |
| 268 | { |
| 269 | #if _LIBCPP_DEBUG_LEVEL0 == 2 |
| 270 | _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y),((void)0) |
| 271 | "Attempted to subtract incompatible iterators")((void)0); |
| 272 | #endif |
| 273 | return __x.base() - __y.base(); |
| 274 | } |
| 275 | |
| 276 | template <class _Iter1> |
| 277 | _LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__ )) _LIBCPP_CONSTEXPR_IF_NODEBUGconstexpr |
| 278 | __wrap_iter<_Iter1> operator+(typename __wrap_iter<_Iter1>::difference_type __n, __wrap_iter<_Iter1> __x) _NOEXCEPTnoexcept |
| 279 | { |
| 280 | __x += __n; |
| 281 | return __x; |
| 282 | } |
| 283 | |
| 284 | #if _LIBCPP_STD_VER14 <= 17 |
| 285 | template <class _It> |
| 286 | struct __is_cpp17_contiguous_iterator<__wrap_iter<_It> > : true_type {}; |
| 287 | #endif |
| 288 | |
| 289 | template <class _Iter> |
| 290 | _LIBCPP_CONSTEXPRconstexpr |
| 291 | decltype(_VSTDstd::__1::__to_address(declval<_Iter>())) |
| 292 | __to_address(__wrap_iter<_Iter> __w) _NOEXCEPTnoexcept { |
| 293 | return _VSTDstd::__1::__to_address(__w.base()); |
| 294 | } |
| 295 | |
| 296 | _LIBCPP_END_NAMESPACE_STD} } |
| 297 | |
| 298 | _LIBCPP_POP_MACROSpop_macro("min") pop_macro("max") |
| 299 | |
| 300 | #endif // _LIBCPP___ITERATOR_WRAP_ITER_H |
| 1 | //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines some vectorizer utilities. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #ifndef LLVM_ANALYSIS_VECTORUTILS_H |
| 14 | #define LLVM_ANALYSIS_VECTORUTILS_H |
| 15 | |
| 16 | #include "llvm/ADT/MapVector.h" |
| 17 | #include "llvm/ADT/SmallVector.h" |
| 18 | #include "llvm/Analysis/LoopAccessAnalysis.h" |
| 19 | #include "llvm/Support/CheckedArithmetic.h" |
| 20 | |
| 21 | namespace llvm { |
| 22 | class TargetLibraryInfo; |
| 23 | |
| 24 | /// Describes the type of Parameters |
| 25 | enum class VFParamKind { |
| 26 | Vector, // No semantic information. |
| 27 | OMP_Linear, // declare simd linear(i) |
| 28 | OMP_LinearRef, // declare simd linear(ref(i)) |
| 29 | OMP_LinearVal, // declare simd linear(val(i)) |
| 30 | OMP_LinearUVal, // declare simd linear(uval(i)) |
| 31 | OMP_LinearPos, // declare simd linear(i:c) uniform(c) |
| 32 | OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c) |
| 33 | OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c) |
| 34 | OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c) |
| 35 | OMP_Uniform, // declare simd uniform(i) |
| 36 | GlobalPredicate, // Global logical predicate that acts on all lanes |
| 37 | // of the input and output mask concurrently. For |
| 38 | // example, it is implied by the `M` token in the |
| 39 | // Vector Function ABI mangled name. |
| 40 | Unknown |
| 41 | }; |
| 42 | |
| 43 | /// Describes the type of Instruction Set Architecture |
| 44 | enum class VFISAKind { |
| 45 | AdvancedSIMD, // AArch64 Advanced SIMD (NEON) |
| 46 | SVE, // AArch64 Scalable Vector Extension |
| 47 | SSE, // x86 SSE |
| 48 | AVX, // x86 AVX |
| 49 | AVX2, // x86 AVX2 |
| 50 | AVX512, // x86 AVX512 |
| 51 | LLVM, // LLVM internal ISA for functions that are not |
| 52 | // attached to an existing ABI via name mangling. |
| 53 | Unknown // Unknown ISA |
| 54 | }; |
| 55 | |
| 56 | /// Encapsulates information needed to describe a parameter. |
| 57 | /// |
| 58 | /// The description of the parameter is not linked directly to |
| 59 | /// OpenMP or any other vector function description. This structure |
| 60 | /// is extendible to handle other paradigms that describe vector |
| 61 | /// functions and their parameters. |
| 62 | struct VFParameter { |
| 63 | unsigned ParamPos; // Parameter Position in Scalar Function. |
| 64 | VFParamKind ParamKind; // Kind of Parameter. |
| 65 | int LinearStepOrPos = 0; // Step or Position of the Parameter. |
| 66 | Align Alignment = Align(); // Optional alignment in bytes, defaulted to 1. |
| 67 | |
| 68 | // Comparison operator. |
| 69 | bool operator==(const VFParameter &Other) const { |
| 70 | return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) == |
| 71 | std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos, |
| 72 | Other.Alignment); |
| 73 | } |
| 74 | }; |
| 75 | |
| 76 | /// Contains the information about the kind of vectorization |
| 77 | /// available. |
| 78 | /// |
| 79 | /// This object in independent on the paradigm used to |
| 80 | /// represent vector functions. in particular, it is not attached to |
| 81 | /// any target-specific ABI. |
| 82 | struct VFShape { |
| 83 | ElementCount VF; // Vectorization factor. |
| 84 | SmallVector<VFParameter, 8> Parameters; // List of parameter information. |
| 85 | // Comparison operator. |
| 86 | bool operator==(const VFShape &Other) const { |
| 87 | return std::tie(VF, Parameters) == std::tie(Other.VF, Other.Parameters); |
| 88 | } |
| 89 | |
| 90 | /// Update the parameter in position P.ParamPos to P. |
| 91 | void updateParam(VFParameter P) { |
| 92 | assert(P.ParamPos < Parameters.size() && "Invalid parameter position.")((void)0); |
| 93 | Parameters[P.ParamPos] = P; |
| 94 | assert(hasValidParameterList() && "Invalid parameter list")((void)0); |
| 95 | } |
| 96 | |
| 97 | // Retrieve the VFShape that can be used to map a (scalar) function to itself, |
| 98 | // with VF = 1. |
| 99 | static VFShape getScalarShape(const CallInst &CI) { |
| 100 | return VFShape::get(CI, ElementCount::getFixed(1), |
| 101 | /*HasGlobalPredicate*/ false); |
| 102 | } |
| 103 | |
| 104 | // Retrieve the basic vectorization shape of the function, where all |
| 105 | // parameters are mapped to VFParamKind::Vector with \p EC |
| 106 | // lanes. Specifies whether the function has a Global Predicate |
| 107 | // argument via \p HasGlobalPred. |
| 108 | static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) { |
| 109 | SmallVector<VFParameter, 8> Parameters; |
| 110 | for (unsigned I = 0; I < CI.arg_size(); ++I) |
| 111 | Parameters.push_back(VFParameter({I, VFParamKind::Vector})); |
| 112 | if (HasGlobalPred) |
| 113 | Parameters.push_back( |
| 114 | VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate})); |
| 115 | |
| 116 | return {EC, Parameters}; |
| 117 | } |
| 118 | /// Sanity check on the Parameters in the VFShape. |
| 119 | bool hasValidParameterList() const; |
| 120 | }; |
| 121 | |
| 122 | /// Holds the VFShape for a specific scalar to vector function mapping. |
| 123 | struct VFInfo { |
| 124 | VFShape Shape; /// Classification of the vector function. |
| 125 | std::string ScalarName; /// Scalar Function Name. |
| 126 | std::string VectorName; /// Vector Function Name associated to this VFInfo. |
| 127 | VFISAKind ISA; /// Instruction Set Architecture. |
| 128 | }; |
| 129 | |
| 130 | namespace VFABI { |
| 131 | /// LLVM Internal VFABI ISA token for vector functions. |
| 132 | static constexpr char const *_LLVM_ = "_LLVM_"; |
| 133 | /// Prefix for internal name redirection for vector function that |
| 134 | /// tells the compiler to scalarize the call using the scalar name |
| 135 | /// of the function. For example, a mangled name like |
| 136 | /// `_ZGV_LLVM_N2v_foo(_LLVM_Scalarize_foo)` would tell the |
| 137 | /// vectorizer to vectorize the scalar call `foo`, and to scalarize |
| 138 | /// it once vectorization is done. |
| 139 | static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_"; |
| 140 | |
| 141 | /// Function to construct a VFInfo out of a mangled names in the |
| 142 | /// following format: |
| 143 | /// |
| 144 | /// <VFABI_name>{(<redirection>)} |
| 145 | /// |
| 146 | /// where <VFABI_name> is the name of the vector function, mangled according |
| 147 | /// to the rules described in the Vector Function ABI of the target vector |
| 148 | /// extension (or <isa> from now on). The <VFABI_name> is in the following |
| 149 | /// format: |
| 150 | /// |
| 151 | /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)] |
| 152 | /// |
| 153 | /// This methods support demangling rules for the following <isa>: |
| 154 | /// |
| 155 | /// * AArch64: https://developer.arm.com/docs/101129/latest |
| 156 | /// |
| 157 | /// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and |
| 158 | /// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt |
| 159 | /// |
| 160 | /// \param MangledName -> input string in the format |
| 161 | /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]. |
| 162 | /// \param M -> Module used to retrieve informations about the vector |
| 163 | /// function that are not possible to retrieve from the mangled |
| 164 | /// name. At the moment, this parameter is needed only to retrieve the |
| 165 | /// Vectorization Factor of scalable vector functions from their |
| 166 | /// respective IR declarations. |
| 167 | Optional<VFInfo> tryDemangleForVFABI(StringRef MangledName, const Module &M); |
| 168 | |
| 169 | /// This routine mangles the given VectorName according to the LangRef |
| 170 | /// specification for vector-function-abi-variant attribute and is specific to |
| 171 | /// the TLI mappings. It is the responsibility of the caller to make sure that |
| 172 | /// this is only used if all parameters in the vector function are vector type. |
| 173 | /// This returned string holds scalar-to-vector mapping: |
| 174 | /// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>) |
| 175 | /// |
| 176 | /// where: |
| 177 | /// |
| 178 | /// <isa> = "_LLVM_" |
| 179 | /// <mask> = "N". Note: TLI does not support masked interfaces. |
| 180 | /// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor` |
| 181 | /// field of the `VecDesc` struct. If the number of lanes is scalable |
| 182 | /// then 'x' is printed instead. |
| 183 | /// <vparams> = "v", as many as are the numArgs. |
| 184 | /// <scalarname> = the name of the scalar function. |
| 185 | /// <vectorname> = the name of the vector function. |
| 186 | std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, |
| 187 | unsigned numArgs, ElementCount VF); |
| 188 | |
| 189 | /// Retrieve the `VFParamKind` from a string token. |
| 190 | VFParamKind getVFParamKindFromString(const StringRef Token); |
| 191 | |
| 192 | // Name of the attribute where the variant mappings are stored. |
| 193 | static constexpr char const *MappingsAttrName = "vector-function-abi-variant"; |
| 194 | |
| 195 | /// Populates a set of strings representing the Vector Function ABI variants |
| 196 | /// associated to the CallInst CI. If the CI does not contain the |
| 197 | /// vector-function-abi-variant attribute, we return without populating |
| 198 | /// VariantMappings, i.e. callers of getVectorVariantNames need not check for |
| 199 | /// the presence of the attribute (see InjectTLIMappings). |
| 200 | void getVectorVariantNames(const CallInst &CI, |
| 201 | SmallVectorImpl<std::string> &VariantMappings); |
| 202 | } // end namespace VFABI |
| 203 | |
| 204 | /// The Vector Function Database. |
| 205 | /// |
| 206 | /// Helper class used to find the vector functions associated to a |
| 207 | /// scalar CallInst. |
| 208 | class VFDatabase { |
| 209 | /// The Module of the CallInst CI. |
| 210 | const Module *M; |
| 211 | /// The CallInst instance being queried for scalar to vector mappings. |
| 212 | const CallInst &CI; |
| 213 | /// List of vector functions descriptors associated to the call |
| 214 | /// instruction. |
| 215 | const SmallVector<VFInfo, 8> ScalarToVectorMappings; |
| 216 | |
| 217 | /// Retrieve the scalar-to-vector mappings associated to the rule of |
| 218 | /// a vector Function ABI. |
| 219 | static void getVFABIMappings(const CallInst &CI, |
| 220 | SmallVectorImpl<VFInfo> &Mappings) { |
| 221 | if (!CI.getCalledFunction()) |
| 222 | return; |
| 223 | |
| 224 | const StringRef ScalarName = CI.getCalledFunction()->getName(); |
| 225 | |
| 226 | SmallVector<std::string, 8> ListOfStrings; |
| 227 | // The check for the vector-function-abi-variant attribute is done when |
| 228 | // retrieving the vector variant names here. |
| 229 | VFABI::getVectorVariantNames(CI, ListOfStrings); |
| 230 | if (ListOfStrings.empty()) |
| 231 | return; |
| 232 | for (const auto &MangledName : ListOfStrings) { |
| 233 | const Optional<VFInfo> Shape = |
| 234 | VFABI::tryDemangleForVFABI(MangledName, *(CI.getModule())); |
| 235 | // A match is found via scalar and vector names, and also by |
| 236 | // ensuring that the variant described in the attribute has a |
| 237 | // corresponding definition or declaration of the vector |
| 238 | // function in the Module M. |
| 239 | if (Shape.hasValue() && (Shape.getValue().ScalarName == ScalarName)) { |
| 240 | assert(CI.getModule()->getFunction(Shape.getValue().VectorName) &&((void)0) |
| 241 | "Vector function is missing.")((void)0); |
| 242 | Mappings.push_back(Shape.getValue()); |
| 243 | } |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | public: |
| 248 | /// Retrieve all the VFInfo instances associated to the CallInst CI. |
| 249 | static SmallVector<VFInfo, 8> getMappings(const CallInst &CI) { |
| 250 | SmallVector<VFInfo, 8> Ret; |
| 251 | |
| 252 | // Get mappings from the Vector Function ABI variants. |
| 253 | getVFABIMappings(CI, Ret); |
| 254 | |
| 255 | // Other non-VFABI variants should be retrieved here. |
| 256 | |
| 257 | return Ret; |
| 258 | } |
| 259 | |
| 260 | /// Constructor, requires a CallInst instance. |
| 261 | VFDatabase(CallInst &CI) |
| 262 | : M(CI.getModule()), CI(CI), |
| 263 | ScalarToVectorMappings(VFDatabase::getMappings(CI)) {} |
| 264 | /// \defgroup VFDatabase query interface. |
| 265 | /// |
| 266 | /// @{ |
| 267 | /// Retrieve the Function with VFShape \p Shape. |
| 268 | Function *getVectorizedFunction(const VFShape &Shape) const { |
| 269 | if (Shape == VFShape::getScalarShape(CI)) |
| 270 | return CI.getCalledFunction(); |
| 271 | |
| 272 | for (const auto &Info : ScalarToVectorMappings) |
| 273 | if (Info.Shape == Shape) |
| 274 | return M->getFunction(Info.VectorName); |
| 275 | |
| 276 | return nullptr; |
| 277 | } |
| 278 | /// @} |
| 279 | }; |
| 280 | |
| 281 | template <typename T> class ArrayRef; |
| 282 | class DemandedBits; |
| 283 | class GetElementPtrInst; |
| 284 | template <typename InstTy> class InterleaveGroup; |
| 285 | class IRBuilderBase; |
| 286 | class Loop; |
| 287 | class ScalarEvolution; |
| 288 | class TargetTransformInfo; |
| 289 | class Type; |
| 290 | class Value; |
| 291 | |
| 292 | namespace Intrinsic { |
| 293 | typedef unsigned ID; |
| 294 | } |
| 295 | |
| 296 | /// A helper function for converting Scalar types to vector types. If |
| 297 | /// the incoming type is void, we return void. If the EC represents a |
| 298 | /// scalar, we return the scalar type. |
| 299 | inline Type *ToVectorTy(Type *Scalar, ElementCount EC) { |
| 300 | if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar()) |
| 301 | return Scalar; |
| 302 | return VectorType::get(Scalar, EC); |
| 303 | } |
| 304 | |
| 305 | inline Type *ToVectorTy(Type *Scalar, unsigned VF) { |
| 306 | return ToVectorTy(Scalar, ElementCount::getFixed(VF)); |
| 307 | } |
| 308 | |
| 309 | /// Identify if the intrinsic is trivially vectorizable. |
| 310 | /// This method returns true if the intrinsic's argument types are all scalars |
| 311 | /// for the scalar form of the intrinsic and all vectors (or scalars handled by |
| 312 | /// hasVectorInstrinsicScalarOpd) for the vector form of the intrinsic. |
| 313 | bool isTriviallyVectorizable(Intrinsic::ID ID); |
| 314 | |
| 315 | /// Identifies if the vector form of the intrinsic has a scalar operand. |
| 316 | bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx); |
| 317 | |
| 318 | /// Identifies if the vector form of the intrinsic has a scalar operand that has |
| 319 | /// an overloaded type. |
| 320 | bool hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID, |
| 321 | unsigned ScalarOpdIdx); |
| 322 | |
| 323 | /// Returns intrinsic ID for call. |
| 324 | /// For the input call instruction it finds mapping intrinsic and returns |
| 325 | /// its intrinsic ID, in case it does not found it return not_intrinsic. |
| 326 | Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, |
| 327 | const TargetLibraryInfo *TLI); |
| 328 | |
| 329 | /// Find the operand of the GEP that should be checked for consecutive |
| 330 | /// stores. This ignores trailing indices that have no effect on the final |
| 331 | /// pointer. |
| 332 | unsigned getGEPInductionOperand(const GetElementPtrInst *Gep); |
| 333 | |
| 334 | /// If the argument is a GEP, then returns the operand identified by |
| 335 | /// getGEPInductionOperand. However, if there is some other non-loop-invariant |
| 336 | /// operand, it returns that instead. |
| 337 | Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp); |
| 338 | |
| 339 | /// If a value has only one user that is a CastInst, return it. |
| 340 | Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty); |
| 341 | |
| 342 | /// Get the stride of a pointer access in a loop. Looks for symbolic |
| 343 | /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. |
| 344 | Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp); |
| 345 | |
| 346 | /// Given a vector and an element number, see if the scalar value is |
| 347 | /// already around as a register, for example if it were inserted then extracted |
| 348 | /// from the vector. |
| 349 | Value *findScalarElement(Value *V, unsigned EltNo); |
| 350 | |
| 351 | /// If all non-negative \p Mask elements are the same value, return that value. |
| 352 | /// If all elements are negative (undefined) or \p Mask contains different |
| 353 | /// non-negative values, return -1. |
| 354 | int getSplatIndex(ArrayRef<int> Mask); |
| 355 | |
| 356 | /// Get splat value if the input is a splat vector or return nullptr. |
| 357 | /// The value may be extracted from a splat constants vector or from |
| 358 | /// a sequence of instructions that broadcast a single value into a vector. |
| 359 | Value *getSplatValue(const Value *V); |
| 360 | |
| 361 | /// Return true if each element of the vector value \p V is poisoned or equal to |
| 362 | /// every other non-poisoned element. If an index element is specified, either |
| 363 | /// every element of the vector is poisoned or the element at that index is not |
| 364 | /// poisoned and equal to every other non-poisoned element. |
| 365 | /// This may be more powerful than the related getSplatValue() because it is |
| 366 | /// not limited by finding a scalar source value to a splatted vector. |
| 367 | bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0); |
| 368 | |
| 369 | /// Replace each shuffle mask index with the scaled sequential indices for an |
| 370 | /// equivalent mask of narrowed elements. Mask elements that are less than 0 |
| 371 | /// (sentinel values) are repeated in the output mask. |
| 372 | /// |
| 373 | /// Example with Scale = 4: |
| 374 | /// <4 x i32> <3, 2, 0, -1> --> |
| 375 | /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> |
| 376 | /// |
| 377 | /// This is the reverse process of widening shuffle mask elements, but it always |
| 378 | /// succeeds because the indexes can always be multiplied (scaled up) to map to |
| 379 | /// narrower vector elements. |
| 380 | void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask, |
| 381 | SmallVectorImpl<int> &ScaledMask); |
| 382 | |
| 383 | /// Try to transform a shuffle mask by replacing elements with the scaled index |
| 384 | /// for an equivalent mask of widened elements. If all mask elements that would |
| 385 | /// map to a wider element of the new mask are the same negative number |
| 386 | /// (sentinel value), that element of the new mask is the same value. If any |
| 387 | /// element in a given slice is negative and some other element in that slice is |
| 388 | /// not the same value, return false (partial matches with sentinel values are |
| 389 | /// not allowed). |
| 390 | /// |
| 391 | /// Example with Scale = 4: |
| 392 | /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> --> |
| 393 | /// <4 x i32> <3, 2, 0, -1> |
| 394 | /// |
| 395 | /// This is the reverse process of narrowing shuffle mask elements if it |
| 396 | /// succeeds. This transform is not always possible because indexes may not |
| 397 | /// divide evenly (scale down) to map to wider vector elements. |
| 398 | bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask, |
| 399 | SmallVectorImpl<int> &ScaledMask); |
| 400 | |
| 401 | /// Compute a map of integer instructions to their minimum legal type |
| 402 | /// size. |
| 403 | /// |
| 404 | /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int |
| 405 | /// type (e.g. i32) whenever arithmetic is performed on them. |
| 406 | /// |
| 407 | /// For targets with native i8 or i16 operations, usually InstCombine can shrink |
| 408 | /// the arithmetic type down again. However InstCombine refuses to create |
| 409 | /// illegal types, so for targets without i8 or i16 registers, the lengthening |
| 410 | /// and shrinking remains. |
| 411 | /// |
| 412 | /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when |
| 413 | /// their scalar equivalents do not, so during vectorization it is important to |
| 414 | /// remove these lengthens and truncates when deciding the profitability of |
| 415 | /// vectorization. |
| 416 | /// |
| 417 | /// This function analyzes the given range of instructions and determines the |
| 418 | /// minimum type size each can be converted to. It attempts to remove or |
| 419 | /// minimize type size changes across each def-use chain, so for example in the |
| 420 | /// following code: |
| 421 | /// |
| 422 | /// %1 = load i8, i8* |
| 423 | /// %2 = add i8 %1, 2 |
| 424 | /// %3 = load i16, i16* |
| 425 | /// %4 = zext i8 %2 to i32 |
| 426 | /// %5 = zext i16 %3 to i32 |
| 427 | /// %6 = add i32 %4, %5 |
| 428 | /// %7 = trunc i32 %6 to i16 |
| 429 | /// |
| 430 | /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes |
| 431 | /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}. |
| 432 | /// |
| 433 | /// If the optional TargetTransformInfo is provided, this function tries harder |
| 434 | /// to do less work by only looking at illegal types. |
| 435 | MapVector<Instruction*, uint64_t> |
| 436 | computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks, |
| 437 | DemandedBits &DB, |
| 438 | const TargetTransformInfo *TTI=nullptr); |
| 439 | |
| 440 | /// Compute the union of two access-group lists. |
| 441 | /// |
| 442 | /// If the list contains just one access group, it is returned directly. If the |
| 443 | /// list is empty, returns nullptr. |
| 444 | MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2); |
| 445 | |
| 446 | /// Compute the access-group list of access groups that @p Inst1 and @p Inst2 |
| 447 | /// are both in. If either instruction does not access memory at all, it is |
| 448 | /// considered to be in every list. |
| 449 | /// |
| 450 | /// If the list contains just one access group, it is returned directly. If the |
| 451 | /// list is empty, returns nullptr. |
| 452 | MDNode *intersectAccessGroups(const Instruction *Inst1, |
| 453 | const Instruction *Inst2); |
| 454 | |
| 455 | /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, |
| 456 | /// MD_nontemporal, MD_access_group]. |
| 457 | /// For K in Kinds, we get the MDNode for K from each of the |
| 458 | /// elements of VL, compute their "intersection" (i.e., the most generic |
| 459 | /// metadata value that covers all of the individual values), and set I's |
| 460 | /// metadata for M equal to the intersection value. |
| 461 | /// |
| 462 | /// This function always sets a (possibly null) value for each K in Kinds. |
| 463 | Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL); |
| 464 | |
| 465 | /// Create a mask that filters the members of an interleave group where there |
| 466 | /// are gaps. |
| 467 | /// |
| 468 | /// For example, the mask for \p Group with interleave-factor 3 |
| 469 | /// and \p VF 4, that has only its first member present is: |
| 470 | /// |
| 471 | /// <1,0,0,1,0,0,1,0,0,1,0,0> |
| 472 | /// |
| 473 | /// Note: The result is a mask of 0's and 1's, as opposed to the other |
| 474 | /// create[*]Mask() utilities which create a shuffle mask (mask that |
| 475 | /// consists of indices). |
| 476 | Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, |
| 477 | const InterleaveGroup<Instruction> &Group); |
| 478 | |
| 479 | /// Create a mask with replicated elements. |
| 480 | /// |
| 481 | /// This function creates a shuffle mask for replicating each of the \p VF |
| 482 | /// elements in a vector \p ReplicationFactor times. It can be used to |
| 483 | /// transform a mask of \p VF elements into a mask of |
| 484 | /// \p VF * \p ReplicationFactor elements used by a predicated |
| 485 | /// interleaved-group of loads/stores whose Interleaved-factor == |
| 486 | /// \p ReplicationFactor. |
| 487 | /// |
| 488 | /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is: |
| 489 | /// |
| 490 | /// <0,0,0,1,1,1,2,2,2,3,3,3> |
| 491 | llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor, |
| 492 | unsigned VF); |
| 493 | |
| 494 | /// Create an interleave shuffle mask. |
| 495 | /// |
| 496 | /// This function creates a shuffle mask for interleaving \p NumVecs vectors of |
| 497 | /// vectorization factor \p VF into a single wide vector. The mask is of the |
| 498 | /// form: |
| 499 | /// |
| 500 | /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...> |
| 501 | /// |
| 502 | /// For example, the mask for VF = 4 and NumVecs = 2 is: |
| 503 | /// |
| 504 | /// <0, 4, 1, 5, 2, 6, 3, 7>. |
| 505 | llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs); |
| 506 | |
| 507 | /// Create a stride shuffle mask. |
| 508 | /// |
| 509 | /// This function creates a shuffle mask whose elements begin at \p Start and |
| 510 | /// are incremented by \p Stride. The mask can be used to deinterleave an |
| 511 | /// interleaved vector into separate vectors of vectorization factor \p VF. The |
| 512 | /// mask is of the form: |
| 513 | /// |
| 514 | /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)> |
| 515 | /// |
| 516 | /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is: |
| 517 | /// |
| 518 | /// <0, 2, 4, 6> |
| 519 | llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride, |
| 520 | unsigned VF); |
| 521 | |
| 522 | /// Create a sequential shuffle mask. |
| 523 | /// |
| 524 | /// This function creates shuffle mask whose elements are sequential and begin |
| 525 | /// at \p Start. The mask contains \p NumInts integers and is padded with \p |
| 526 | /// NumUndefs undef values. The mask is of the form: |
| 527 | /// |
| 528 | /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs> |
| 529 | /// |
| 530 | /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is: |
| 531 | /// |
| 532 | /// <0, 1, 2, 3, undef, undef, undef, undef> |
| 533 | llvm::SmallVector<int, 16> |
| 534 | createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs); |
| 535 | |
| 536 | /// Concatenate a list of vectors. |
| 537 | /// |
| 538 | /// This function generates code that concatenate the vectors in \p Vecs into a |
| 539 | /// single large vector. The number of vectors should be greater than one, and |
| 540 | /// their element types should be the same. The number of elements in the |
| 541 | /// vectors should also be the same; however, if the last vector has fewer |
| 542 | /// elements, it will be padded with undefs. |
| 543 | Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs); |
| 544 | |
| 545 | /// Given a mask vector of i1, Return true if all of the elements of this |
| 546 | /// predicate mask are known to be false or undef. That is, return true if all |
| 547 | /// lanes can be assumed inactive. |
| 548 | bool maskIsAllZeroOrUndef(Value *Mask); |
| 549 | |
| 550 | /// Given a mask vector of i1, Return true if all of the elements of this |
| 551 | /// predicate mask are known to be true or undef. That is, return true if all |
| 552 | /// lanes can be assumed active. |
| 553 | bool maskIsAllOneOrUndef(Value *Mask); |
| 554 | |
| 555 | /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) |
| 556 | /// for each lane which may be active. |
| 557 | APInt possiblyDemandedEltsInMask(Value *Mask); |
| 558 | |
| 559 | /// The group of interleaved loads/stores sharing the same stride and |
| 560 | /// close to each other. |
| 561 | /// |
| 562 | /// Each member in this group has an index starting from 0, and the largest |
| 563 | /// index should be less than interleaved factor, which is equal to the absolute |
| 564 | /// value of the access's stride. |
| 565 | /// |
| 566 | /// E.g. An interleaved load group of factor 4: |
| 567 | /// for (unsigned i = 0; i < 1024; i+=4) { |
| 568 | /// a = A[i]; // Member of index 0 |
| 569 | /// b = A[i+1]; // Member of index 1 |
| 570 | /// d = A[i+3]; // Member of index 3 |
| 571 | /// ... |
| 572 | /// } |
| 573 | /// |
| 574 | /// An interleaved store group of factor 4: |
| 575 | /// for (unsigned i = 0; i < 1024; i+=4) { |
| 576 | /// ... |
| 577 | /// A[i] = a; // Member of index 0 |
| 578 | /// A[i+1] = b; // Member of index 1 |
| 579 | /// A[i+2] = c; // Member of index 2 |
| 580 | /// A[i+3] = d; // Member of index 3 |
| 581 | /// } |
| 582 | /// |
| 583 | /// Note: the interleaved load group could have gaps (missing members), but |
| 584 | /// the interleaved store group doesn't allow gaps. |
| 585 | template <typename InstTy> class InterleaveGroup { |
| 586 | public: |
| 587 | InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment) |
| 588 | : Factor(Factor), Reverse(Reverse), Alignment(Alignment), |
| 589 | InsertPos(nullptr) {} |
| 590 | |
| 591 | InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment) |
| 592 | : Alignment(Alignment), InsertPos(Instr) { |
| 593 | Factor = std::abs(Stride); |
| 594 | assert(Factor > 1 && "Invalid interleave factor")((void)0); |
| 595 | |
| 596 | Reverse = Stride < 0; |
| 597 | Members[0] = Instr; |
| 598 | } |
| 599 | |
| 600 | bool isReverse() const { return Reverse; } |
| 601 | uint32_t getFactor() const { return Factor; } |
| 602 | Align getAlign() const { return Alignment; } |
| 603 | uint32_t getNumMembers() const { return Members.size(); } |
| 604 | |
| 605 | /// Try to insert a new member \p Instr with index \p Index and |
| 606 | /// alignment \p NewAlign. The index is related to the leader and it could be |
| 607 | /// negative if it is the new leader. |
| 608 | /// |
| 609 | /// \returns false if the instruction doesn't belong to the group. |
| 610 | bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) { |
| 611 | // Make sure the key fits in an int32_t. |
| 612 | Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey); |
| 613 | if (!MaybeKey) |
| 614 | return false; |
| 615 | int32_t Key = *MaybeKey; |
| 616 | |
| 617 | // Skip if the key is used for either the tombstone or empty special values. |
| 618 | if (DenseMapInfo<int32_t>::getTombstoneKey() == Key || |
| 619 | DenseMapInfo<int32_t>::getEmptyKey() == Key) |
| 620 | return false; |
| 621 | |
| 622 | // Skip if there is already a member with the same index. |
| 623 | if (Members.find(Key) != Members.end()) |
| 624 | return false; |
| 625 | |
| 626 | if (Key > LargestKey) { |
| 627 | // The largest index is always less than the interleave factor. |
| 628 | if (Index >= static_cast<int32_t>(Factor)) |
| 629 | return false; |
| 630 | |
| 631 | LargestKey = Key; |
| 632 | } else if (Key < SmallestKey) { |
| 633 | |
| 634 | // Make sure the largest index fits in an int32_t. |
| 635 | Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key); |
| 636 | if (!MaybeLargestIndex) |
| 637 | return false; |
| 638 | |
| 639 | // The largest index is always less than the interleave factor. |
| 640 | if (*MaybeLargestIndex >= static_cast<int64_t>(Factor)) |
| 641 | return false; |
| 642 | |
| 643 | SmallestKey = Key; |
| 644 | } |
| 645 | |
| 646 | // It's always safe to select the minimum alignment. |
| 647 | Alignment = std::min(Alignment, NewAlign); |
| 648 | Members[Key] = Instr; |
| 649 | return true; |
| 650 | } |
| 651 | |
| 652 | /// Get the member with the given index \p Index |
| 653 | /// |
| 654 | /// \returns nullptr if contains no such member. |
| 655 | InstTy *getMember(uint32_t Index) const { |
| 656 | int32_t Key = SmallestKey + Index; |
| 657 | return Members.lookup(Key); |
| 658 | } |
| 659 | |
| 660 | /// Get the index for the given member. Unlike the key in the member |
| 661 | /// map, the index starts from 0. |
| 662 | uint32_t getIndex(const InstTy *Instr) const { |
| 663 | for (auto I : Members) { |
| 664 | if (I.second == Instr) |
| 665 | return I.first - SmallestKey; |
| 666 | } |
| 667 | |
| 668 | llvm_unreachable("InterleaveGroup contains no such member")__builtin_unreachable(); |
| 669 | } |
| 670 | |
| 671 | InstTy *getInsertPos() const { return InsertPos; } |
| 672 | void setInsertPos(InstTy *Inst) { InsertPos = Inst; } |
| 673 | |
| 674 | /// Add metadata (e.g. alias info) from the instructions in this group to \p |
| 675 | /// NewInst. |
| 676 | /// |
| 677 | /// FIXME: this function currently does not add noalias metadata a'la |
| 678 | /// addNewMedata. To do that we need to compute the intersection of the |
| 679 | /// noalias info from all members. |
| 680 | void addMetadata(InstTy *NewInst) const; |
| 681 | |
| 682 | /// Returns true if this Group requires a scalar iteration to handle gaps. |
| 683 | bool requiresScalarEpilogue() const { |
| 684 | // If the last member of the Group exists, then a scalar epilog is not |
| 685 | // needed for this group. |
| 686 | if (getMember(getFactor() - 1)) |
| 687 | return false; |
| 688 | |
| 689 | // We have a group with gaps. It therefore cannot be a group of stores, |
| 690 | // and it can't be a reversed access, because such groups get invalidated. |
| 691 | assert(!getMember(0)->mayWriteToMemory() &&((void)0) |
| 692 | "Group should have been invalidated")((void)0); |
| 693 | assert(!isReverse() && "Group should have been invalidated")((void)0); |
| 694 | |
| 695 | // This is a group of loads, with gaps, and without a last-member |
| 696 | return true; |
| 697 | } |
| 698 | |
| 699 | private: |
| 700 | uint32_t Factor; // Interleave Factor. |
| 701 | bool Reverse; |
| 702 | Align Alignment; |
| 703 | DenseMap<int32_t, InstTy *> Members; |
| 704 | int32_t SmallestKey = 0; |
| 705 | int32_t LargestKey = 0; |
| 706 | |
| 707 | // To avoid breaking dependences, vectorized instructions of an interleave |
| 708 | // group should be inserted at either the first load or the last store in |
| 709 | // program order. |
| 710 | // |
| 711 | // E.g. %even = load i32 // Insert Position |
| 712 | // %add = add i32 %even // Use of %even |
| 713 | // %odd = load i32 |
| 714 | // |
| 715 | // store i32 %even |
| 716 | // %odd = add i32 // Def of %odd |
| 717 | // store i32 %odd // Insert Position |
| 718 | InstTy *InsertPos; |
| 719 | }; |
| 720 | |
| 721 | /// Drive the analysis of interleaved memory accesses in the loop. |
| 722 | /// |
| 723 | /// Use this class to analyze interleaved accesses only when we can vectorize |
| 724 | /// a loop. Otherwise it's meaningless to do analysis as the vectorization |
| 725 | /// on interleaved accesses is unsafe. |
| 726 | /// |
| 727 | /// The analysis collects interleave groups and records the relationships |
| 728 | /// between the member and the group in a map. |
| 729 | class InterleavedAccessInfo { |
| 730 | public: |
| 731 | InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, |
| 732 | DominatorTree *DT, LoopInfo *LI, |
| 733 | const LoopAccessInfo *LAI) |
| 734 | : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {} |
| 735 | |
| 736 | ~InterleavedAccessInfo() { invalidateGroups(); } |
| 737 | |
| 738 | /// Analyze the interleaved accesses and collect them in interleave |
| 739 | /// groups. Substitute symbolic strides using \p Strides. |
| 740 | /// Consider also predicated loads/stores in the analysis if |
| 741 | /// \p EnableMaskedInterleavedGroup is true. |
| 742 | void analyzeInterleaving(bool EnableMaskedInterleavedGroup); |
| 743 | |
| 744 | /// Invalidate groups, e.g., in case all blocks in loop will be predicated |
| 745 | /// contrary to original assumption. Although we currently prevent group |
| 746 | /// formation for predicated accesses, we may be able to relax this limitation |
| 747 | /// in the future once we handle more complicated blocks. Returns true if any |
| 748 | /// groups were invalidated. |
| 749 | bool invalidateGroups() { |
| 750 | if (InterleaveGroups.empty()) { |
| 751 | assert(((void)0) |
| 752 | !RequiresScalarEpilogue &&((void)0) |
| 753 | "RequiresScalarEpilog should not be set without interleave groups")((void)0); |
| 754 | return false; |
| 755 | } |
| 756 | |
| 757 | InterleaveGroupMap.clear(); |
| 758 | for (auto *Ptr : InterleaveGroups) |
| 759 | delete Ptr; |
| 760 | InterleaveGroups.clear(); |
| 761 | RequiresScalarEpilogue = false; |
| 762 | return true; |
| 763 | } |
| 764 | |
| 765 | /// Check if \p Instr belongs to any interleave group. |
| 766 | bool isInterleaved(Instruction *Instr) const { |
| 767 | return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end(); |
| 768 | } |
| 769 | |
| 770 | /// Get the interleave group that \p Instr belongs to. |
| 771 | /// |
| 772 | /// \returns nullptr if doesn't have such group. |
| 773 | InterleaveGroup<Instruction> * |
| 774 | getInterleaveGroup(const Instruction *Instr) const { |
| 775 | return InterleaveGroupMap.lookup(Instr); |
| 776 | } |
| 777 | |
| 778 | iterator_range<SmallPtrSetIterator<llvm::InterleaveGroup<Instruction> *>> |
| 779 | getInterleaveGroups() { |
| 780 | return make_range(InterleaveGroups.begin(), InterleaveGroups.end()); |
| 781 | } |
| 782 | |
| 783 | /// Returns true if an interleaved group that may access memory |
| 784 | /// out-of-bounds requires a scalar epilogue iteration for correctness. |
| 785 | bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; } |
| 786 | |
| 787 | /// Invalidate groups that require a scalar epilogue (due to gaps). This can |
| 788 | /// happen when optimizing for size forbids a scalar epilogue, and the gap |
| 789 | /// cannot be filtered by masking the load/store. |
| 790 | void invalidateGroupsRequiringScalarEpilogue(); |
| 791 | |
| 792 | private: |
| 793 | /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. |
| 794 | /// Simplifies SCEV expressions in the context of existing SCEV assumptions. |
| 795 | /// The interleaved access analysis can also add new predicates (for example |
| 796 | /// by versioning strides of pointers). |
| 797 | PredicatedScalarEvolution &PSE; |
| 798 | |
| 799 | Loop *TheLoop; |
| 800 | DominatorTree *DT; |
| 801 | LoopInfo *LI; |
| 802 | const LoopAccessInfo *LAI; |
| 803 | |
| 804 | /// True if the loop may contain non-reversed interleaved groups with |
| 805 | /// out-of-bounds accesses. We ensure we don't speculatively access memory |
| 806 | /// out-of-bounds by executing at least one scalar epilogue iteration. |
| 807 | bool RequiresScalarEpilogue = false; |
| 808 | |
| 809 | /// Holds the relationships between the members and the interleave group. |
| 810 | DenseMap<Instruction *, InterleaveGroup<Instruction> *> InterleaveGroupMap; |
| 811 | |
| 812 | SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups; |
| 813 | |
| 814 | /// Holds dependences among the memory accesses in the loop. It maps a source |
| 815 | /// access to a set of dependent sink accesses. |
| 816 | DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences; |
| 817 | |
| 818 | /// The descriptor for a strided memory access. |
| 819 | struct StrideDescriptor { |
| 820 | StrideDescriptor() = default; |
| 821 | StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size, |
| 822 | Align Alignment) |
| 823 | : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {} |
| 824 | |
| 825 | // The access's stride. It is negative for a reverse access. |
| 826 | int64_t Stride = 0; |
| 827 | |
| 828 | // The scalar expression of this access. |
| 829 | const SCEV *Scev = nullptr; |
| 830 | |
| 831 | // The size of the memory object. |
| 832 | uint64_t Size = 0; |
| 833 | |
| 834 | // The alignment of this access. |
| 835 | Align Alignment; |
| 836 | }; |
| 837 | |
| 838 | /// A type for holding instructions and their stride descriptors. |
| 839 | using StrideEntry = std::pair<Instruction *, StrideDescriptor>; |
| 840 | |
| 841 | /// Create a new interleave group with the given instruction \p Instr, |
| 842 | /// stride \p Stride and alignment \p Align. |
| 843 | /// |
| 844 | /// \returns the newly created interleave group. |
| 845 | InterleaveGroup<Instruction> * |
| 846 | createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) { |
| 847 | assert(!InterleaveGroupMap.count(Instr) &&((void)0) |
| 848 | "Already in an interleaved access group")((void)0); |
| 849 | InterleaveGroupMap[Instr] = |
| 850 | new InterleaveGroup<Instruction>(Instr, Stride, Alignment); |
| 851 | InterleaveGroups.insert(InterleaveGroupMap[Instr]); |
| 852 | return InterleaveGroupMap[Instr]; |
| 853 | } |
| 854 | |
| 855 | /// Release the group and remove all the relationships. |
| 856 | void releaseGroup(InterleaveGroup<Instruction> *Group) { |
| 857 | for (unsigned i = 0; i < Group->getFactor(); i++) |
| 858 | if (Instruction *Member = Group->getMember(i)) |
| 859 | InterleaveGroupMap.erase(Member); |
| 860 | |
| 861 | InterleaveGroups.erase(Group); |
| 862 | delete Group; |
| 863 | } |
| 864 | |
| 865 | /// Collect all the accesses with a constant stride in program order. |
| 866 | void collectConstStrideAccesses( |
| 867 | MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo, |
| 868 | const ValueToValueMap &Strides); |
| 869 | |
| 870 | /// Returns true if \p Stride is allowed in an interleaved group. |
| 871 | static bool isStrided(int Stride); |
| 872 | |
| 873 | /// Returns true if \p BB is a predicated block. |
| 874 | bool isPredicated(BasicBlock *BB) const { |
| 875 | return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); |
| 876 | } |
| 877 | |
| 878 | /// Returns true if LoopAccessInfo can be used for dependence queries. |
| 879 | bool areDependencesValid() const { |
| 880 | return LAI && LAI->getDepChecker().getDependences(); |
| 881 | } |
| 882 | |
| 883 | /// Returns true if memory accesses \p A and \p B can be reordered, if |
| 884 | /// necessary, when constructing interleaved groups. |
| 885 | /// |
| 886 | /// \p A must precede \p B in program order. We return false if reordering is |
| 887 | /// not necessary or is prevented because \p A and \p B may be dependent. |
| 888 | bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A, |
| 889 | StrideEntry *B) const { |
| 890 | // Code motion for interleaved accesses can potentially hoist strided loads |
| 891 | // and sink strided stores. The code below checks the legality of the |
| 892 | // following two conditions: |
| 893 | // |
| 894 | // 1. Potentially moving a strided load (B) before any store (A) that |
| 895 | // precedes B, or |
| 896 | // |
| 897 | // 2. Potentially moving a strided store (A) after any load or store (B) |
| 898 | // that A precedes. |
| 899 | // |
| 900 | // It's legal to reorder A and B if we know there isn't a dependence from A |
| 901 | // to B. Note that this determination is conservative since some |
| 902 | // dependences could potentially be reordered safely. |
| 903 | |
| 904 | // A is potentially the source of a dependence. |
| 905 | auto *Src = A->first; |
| 906 | auto SrcDes = A->second; |
| 907 | |
| 908 | // B is potentially the sink of a dependence. |
| 909 | auto *Sink = B->first; |
| 910 | auto SinkDes = B->second; |
| 911 | |
| 912 | // Code motion for interleaved accesses can't violate WAR dependences. |
| 913 | // Thus, reordering is legal if the source isn't a write. |
| 914 | if (!Src->mayWriteToMemory()) |
| 915 | return true; |
| 916 | |
| 917 | // At least one of the accesses must be strided. |
| 918 | if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride)) |
| 919 | return true; |
| 920 | |
| 921 | // If dependence information is not available from LoopAccessInfo, |
| 922 | // conservatively assume the instructions can't be reordered. |
| 923 | if (!areDependencesValid()) |
| 924 | return false; |
| 925 | |
| 926 | // If we know there is a dependence from source to sink, assume the |
| 927 | // instructions can't be reordered. Otherwise, reordering is legal. |
| 928 | return Dependences.find(Src) == Dependences.end() || |
| 929 | !Dependences.lookup(Src).count(Sink); |
| 930 | } |
| 931 | |
| 932 | /// Collect the dependences from LoopAccessInfo. |
| 933 | /// |
| 934 | /// We process the dependences once during the interleaved access analysis to |
| 935 | /// enable constant-time dependence queries. |
| 936 | void collectDependences() { |
| 937 | if (!areDependencesValid()) |
| 938 | return; |
| 939 | auto *Deps = LAI->getDepChecker().getDependences(); |
| 940 | for (auto Dep : *Deps) |
| 941 | Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI)); |
| 942 | } |
| 943 | }; |
| 944 | |
| 945 | } // llvm namespace |
| 946 | |
| 947 | #endif |
| 1 | //===- llvm/ADT/DenseMap.h - Dense probed hash table ------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines the DenseMap class. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #ifndef LLVM_ADT_DENSEMAP_H |
| 14 | #define LLVM_ADT_DENSEMAP_H |
| 15 | |
| 16 | #include "llvm/ADT/DenseMapInfo.h" |
| 17 | #include "llvm/ADT/EpochTracker.h" |
| 18 | #include "llvm/Support/AlignOf.h" |
| 19 | #include "llvm/Support/Compiler.h" |
| 20 | #include "llvm/Support/MathExtras.h" |
| 21 | #include "llvm/Support/MemAlloc.h" |
| 22 | #include "llvm/Support/ReverseIteration.h" |
| 23 | #include "llvm/Support/type_traits.h" |
| 24 | #include <algorithm> |
| 25 | #include <cassert> |
| 26 | #include <cstddef> |
| 27 | #include <cstring> |
| 28 | #include <initializer_list> |
| 29 | #include <iterator> |
| 30 | #include <new> |
| 31 | #include <type_traits> |
| 32 | #include <utility> |
| 33 | |
| 34 | namespace llvm { |
| 35 | |
| 36 | namespace detail { |
| 37 | |
| 38 | // We extend a pair to allow users to override the bucket type with their own |
| 39 | // implementation without requiring two members. |
| 40 | template <typename KeyT, typename ValueT> |
| 41 | struct DenseMapPair : public std::pair<KeyT, ValueT> { |
| 42 | using std::pair<KeyT, ValueT>::pair; |
| 43 | |
| 44 | KeyT &getFirst() { return std::pair<KeyT, ValueT>::first; } |
| 45 | const KeyT &getFirst() const { return std::pair<KeyT, ValueT>::first; } |
| 46 | ValueT &getSecond() { return std::pair<KeyT, ValueT>::second; } |
| 47 | const ValueT &getSecond() const { return std::pair<KeyT, ValueT>::second; } |
| 48 | }; |
| 49 | |
| 50 | } // end namespace detail |
| 51 | |
| 52 | template <typename KeyT, typename ValueT, |
| 53 | typename KeyInfoT = DenseMapInfo<KeyT>, |
| 54 | typename Bucket = llvm::detail::DenseMapPair<KeyT, ValueT>, |
| 55 | bool IsConst = false> |
| 56 | class DenseMapIterator; |
| 57 | |
| 58 | template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT, |
| 59 | typename BucketT> |
| 60 | class DenseMapBase : public DebugEpochBase { |
| 61 | template <typename T> |
| 62 | using const_arg_type_t = typename const_pointer_or_const_ref<T>::type; |
| 63 | |
| 64 | public: |
| 65 | using size_type = unsigned; |
| 66 | using key_type = KeyT; |
| 67 | using mapped_type = ValueT; |
| 68 | using value_type = BucketT; |
| 69 | |
| 70 | using iterator = DenseMapIterator<KeyT, ValueT, KeyInfoT, BucketT>; |
| 71 | using const_iterator = |
| 72 | DenseMapIterator<KeyT, ValueT, KeyInfoT, BucketT, true>; |
| 73 | |
| 74 | inline iterator begin() { |
| 75 | // When the map is empty, avoid the overhead of advancing/retreating past |
| 76 | // empty buckets. |
| 77 | if (empty()) |
| 78 | return end(); |
| 79 | if (shouldReverseIterate<KeyT>()) |
| 80 | return makeIterator(getBucketsEnd() - 1, getBuckets(), *this); |
| 81 | return makeIterator(getBuckets(), getBucketsEnd(), *this); |
| 82 | } |
| 83 | inline iterator end() { |
| 84 | return makeIterator(getBucketsEnd(), getBucketsEnd(), *this, true); |
| 85 | } |
| 86 | inline const_iterator begin() const { |
| 87 | if (empty()) |
| 88 | return end(); |
| 89 | if (shouldReverseIterate<KeyT>()) |
| 90 | return makeConstIterator(getBucketsEnd() - 1, getBuckets(), *this); |
| 91 | return makeConstIterator(getBuckets(), getBucketsEnd(), *this); |
| 92 | } |
| 93 | inline const_iterator end() const { |
| 94 | return makeConstIterator(getBucketsEnd(), getBucketsEnd(), *this, true); |
| 95 | } |
| 96 | |
| 97 | LLVM_NODISCARD[[clang::warn_unused_result]] bool empty() const { |
| 98 | return getNumEntries() == 0; |
| 99 | } |
| 100 | unsigned size() const { return getNumEntries(); } |
| 101 | |
| 102 | /// Grow the densemap so that it can contain at least \p NumEntries items |
| 103 | /// before resizing again. |
| 104 | void reserve(size_type NumEntries) { |
| 105 | auto NumBuckets = getMinBucketToReserveForEntries(NumEntries); |
| 106 | incrementEpoch(); |
| 107 | if (NumBuckets > getNumBuckets()) |
| 108 | grow(NumBuckets); |
| 109 | } |
| 110 | |
| 111 | void clear() { |
| 112 | incrementEpoch(); |
| 113 | if (getNumEntries() == 0 && getNumTombstones() == 0) return; |
| 114 | |
| 115 | // If the capacity of the array is huge, and the # elements used is small, |
| 116 | // shrink the array. |
| 117 | if (getNumEntries() * 4 < getNumBuckets() && getNumBuckets() > 64) { |
| 118 | shrink_and_clear(); |
| 119 | return; |
| 120 | } |
| 121 | |
| 122 | const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey(); |
| 123 | if (std::is_trivially_destructible<ValueT>::value) { |
| 124 | // Use a simpler loop when values don't need destruction. |
| 125 | for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) |
| 126 | P->getFirst() = EmptyKey; |
| 127 | } else { |
| 128 | unsigned NumEntries = getNumEntries(); |
| 129 | for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) { |
| 130 | if (!KeyInfoT::isEqual(P->getFirst(), EmptyKey)) { |
| 131 | if (!KeyInfoT::isEqual(P->getFirst(), TombstoneKey)) { |
| 132 | P->getSecond().~ValueT(); |
| 133 | --NumEntries; |
| 134 | } |
| 135 | P->getFirst() = EmptyKey; |
| 136 | } |
| 137 | } |
| 138 | assert(NumEntries == 0 && "Node count imbalance!")((void)0); |
| 139 | } |
| 140 | setNumEntries(0); |
| 141 | setNumTombstones(0); |
| 142 | } |
| 143 | |
| 144 | /// Return 1 if the specified key is in the map, 0 otherwise. |
| 145 | size_type count(const_arg_type_t<KeyT> Val) const { |
| 146 | const BucketT *TheBucket; |
| 147 | return LookupBucketFor(Val, TheBucket) ? 1 : 0; |
| 148 | } |
| 149 | |
| 150 | iterator find(const_arg_type_t<KeyT> Val) { |
| 151 | BucketT *TheBucket; |
| 152 | if (LookupBucketFor(Val, TheBucket)) |
| 153 | return makeIterator(TheBucket, |
| 154 | shouldReverseIterate<KeyT>() ? getBuckets() |
| 155 | : getBucketsEnd(), |
| 156 | *this, true); |
| 157 | return end(); |
| 158 | } |
| 159 | const_iterator find(const_arg_type_t<KeyT> Val) const { |
| 160 | const BucketT *TheBucket; |
| 161 | if (LookupBucketFor(Val, TheBucket)) |
| 162 | return makeConstIterator(TheBucket, |
| 163 | shouldReverseIterate<KeyT>() ? getBuckets() |
| 164 | : getBucketsEnd(), |
| 165 | *this, true); |
| 166 | return end(); |
| 167 | } |
| 168 | |
| 169 | /// Alternate version of find() which allows a different, and possibly |
| 170 | /// less expensive, key type. |
| 171 | /// The DenseMapInfo is responsible for supplying methods |
| 172 | /// getHashValue(LookupKeyT) and isEqual(LookupKeyT, KeyT) for each key |
| 173 | /// type used. |
| 174 | template<class LookupKeyT> |
| 175 | iterator find_as(const LookupKeyT &Val) { |
| 176 | BucketT *TheBucket; |
| 177 | if (LookupBucketFor(Val, TheBucket)) |
| 178 | return makeIterator(TheBucket, |
| 179 | shouldReverseIterate<KeyT>() ? getBuckets() |
| 180 | : getBucketsEnd(), |
| 181 | *this, true); |
| 182 | return end(); |
| 183 | } |
| 184 | template<class LookupKeyT> |
| 185 | const_iterator find_as(const LookupKeyT &Val) const { |
| 186 | const BucketT *TheBucket; |
| 187 | if (LookupBucketFor(Val, TheBucket)) |
| 188 | return makeConstIterator(TheBucket, |
| 189 | shouldReverseIterate<KeyT>() ? getBuckets() |
| 190 | : getBucketsEnd(), |
| 191 | *this, true); |
| 192 | return end(); |
| 193 | } |
| 194 | |
| 195 | /// lookup - Return the entry for the specified key, or a default |
| 196 | /// constructed value if no such entry exists. |
| 197 | ValueT lookup(const_arg_type_t<KeyT> Val) const { |
| 198 | const BucketT *TheBucket; |
| 199 | if (LookupBucketFor(Val, TheBucket)) |
| 200 | return TheBucket->getSecond(); |
| 201 | return ValueT(); |
| 202 | } |
| 203 | |
| 204 | // Inserts key,value pair into the map if the key isn't already in the map. |
| 205 | // If the key is already in the map, it returns false and doesn't update the |
| 206 | // value. |
| 207 | std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &KV) { |
| 208 | return try_emplace(KV.first, KV.second); |
| 209 | } |
| 210 | |
| 211 | // Inserts key,value pair into the map if the key isn't already in the map. |
| 212 | // If the key is already in the map, it returns false and doesn't update the |
| 213 | // value. |
| 214 | std::pair<iterator, bool> insert(std::pair<KeyT, ValueT> &&KV) { |
| 215 | return try_emplace(std::move(KV.first), std::move(KV.second)); |
| 216 | } |
| 217 | |
| 218 | // Inserts key,value pair into the map if the key isn't already in the map. |
| 219 | // The value is constructed in-place if the key is not in the map, otherwise |
| 220 | // it is not moved. |
| 221 | template <typename... Ts> |
| 222 | std::pair<iterator, bool> try_emplace(KeyT &&Key, Ts &&... Args) { |
| 223 | BucketT *TheBucket; |
| 224 | if (LookupBucketFor(Key, TheBucket)) |
| 225 | return std::make_pair(makeIterator(TheBucket, |
| 226 | shouldReverseIterate<KeyT>() |
| 227 | ? getBuckets() |
| 228 | : getBucketsEnd(), |
| 229 | *this, true), |
| 230 | false); // Already in map. |
| 231 | |
| 232 | // Otherwise, insert the new element. |
| 233 | TheBucket = |
| 234 | InsertIntoBucket(TheBucket, std::move(Key), std::forward<Ts>(Args)...); |
| 235 | return std::make_pair(makeIterator(TheBucket, |
| 236 | shouldReverseIterate<KeyT>() |
| 237 | ? getBuckets() |
| 238 | : getBucketsEnd(), |
| 239 | *this, true), |
| 240 | true); |
| 241 | } |
| 242 | |
| 243 | // Inserts key,value pair into the map if the key isn't already in the map. |
| 244 | // The value is constructed in-place if the key is not in the map, otherwise |
| 245 | // it is not moved. |
| 246 | template <typename... Ts> |
| 247 | std::pair<iterator, bool> try_emplace(const KeyT &Key, Ts &&... Args) { |
| 248 | BucketT *TheBucket; |
| 249 | if (LookupBucketFor(Key, TheBucket)) |
| 250 | return std::make_pair(makeIterator(TheBucket, |
| 251 | shouldReverseIterate<KeyT>() |
| 252 | ? getBuckets() |
| 253 | : getBucketsEnd(), |
| 254 | *this, true), |
| 255 | false); // Already in map. |
| 256 | |
| 257 | // Otherwise, insert the new element. |
| 258 | TheBucket = InsertIntoBucket(TheBucket, Key, std::forward<Ts>(Args)...); |
| 259 | return std::make_pair(makeIterator(TheBucket, |
| 260 | shouldReverseIterate<KeyT>() |
| 261 | ? getBuckets() |
| 262 | : getBucketsEnd(), |
| 263 | *this, true), |
| 264 | true); |
| 265 | } |
| 266 | |
| 267 | /// Alternate version of insert() which allows a different, and possibly |
| 268 | /// less expensive, key type. |
| 269 | /// The DenseMapInfo is responsible for supplying methods |
| 270 | /// getHashValue(LookupKeyT) and isEqual(LookupKeyT, KeyT) for each key |
| 271 | /// type used. |
| 272 | template <typename LookupKeyT> |
| 273 | std::pair<iterator, bool> insert_as(std::pair<KeyT, ValueT> &&KV, |
| 274 | const LookupKeyT &Val) { |
| 275 | BucketT *TheBucket; |
| 276 | if (LookupBucketFor(Val, TheBucket)) |
| 277 | return std::make_pair(makeIterator(TheBucket, |
| 278 | shouldReverseIterate<KeyT>() |
| 279 | ? getBuckets() |
| 280 | : getBucketsEnd(), |
| 281 | *this, true), |
| 282 | false); // Already in map. |
| 283 | |
| 284 | // Otherwise, insert the new element. |
| 285 | TheBucket = InsertIntoBucketWithLookup(TheBucket, std::move(KV.first), |
| 286 | std::move(KV.second), Val); |
| 287 | return std::make_pair(makeIterator(TheBucket, |
| 288 | shouldReverseIterate<KeyT>() |
| 289 | ? getBuckets() |
| 290 | : getBucketsEnd(), |
| 291 | *this, true), |
| 292 | true); |
| 293 | } |
| 294 | |
| 295 | /// insert - Range insertion of pairs. |
| 296 | template<typename InputIt> |
| 297 | void insert(InputIt I, InputIt E) { |
| 298 | for (; I != E; ++I) |
| 299 | insert(*I); |
| 300 | } |
| 301 | |
| 302 | bool erase(const KeyT &Val) { |
| 303 | BucketT *TheBucket; |
| 304 | if (!LookupBucketFor(Val, TheBucket)) |
| 305 | return false; // not in map. |
| 306 | |
| 307 | TheBucket->getSecond().~ValueT(); |
| 308 | TheBucket->getFirst() = getTombstoneKey(); |
| 309 | decrementNumEntries(); |
| 310 | incrementNumTombstones(); |
| 311 | return true; |
| 312 | } |
| 313 | void erase(iterator I) { |
| 314 | BucketT *TheBucket = &*I; |
| 315 | TheBucket->getSecond().~ValueT(); |
| 316 | TheBucket->getFirst() = getTombstoneKey(); |
| 317 | decrementNumEntries(); |
| 318 | incrementNumTombstones(); |
| 319 | } |
| 320 | |
| 321 | value_type& FindAndConstruct(const KeyT &Key) { |
| 322 | BucketT *TheBucket; |
| 323 | if (LookupBucketFor(Key, TheBucket)) |
| 324 | return *TheBucket; |
| 325 | |
| 326 | return *InsertIntoBucket(TheBucket, Key); |
| 327 | } |
| 328 | |
| 329 | ValueT &operator[](const KeyT &Key) { |
| 330 | return FindAndConstruct(Key).second; |
| 331 | } |
| 332 | |
| 333 | value_type& FindAndConstruct(KeyT &&Key) { |
| 334 | BucketT *TheBucket; |
| 335 | if (LookupBucketFor(Key, TheBucket)) |
| 336 | return *TheBucket; |
| 337 | |
| 338 | return *InsertIntoBucket(TheBucket, std::move(Key)); |
| 339 | } |
| 340 | |
| 341 | ValueT &operator[](KeyT &&Key) { |
| 342 | return FindAndConstruct(std::move(Key)).second; |
| 343 | } |
| 344 | |
| 345 | /// isPointerIntoBucketsArray - Return true if the specified pointer points |
| 346 | /// somewhere into the DenseMap's array of buckets (i.e. either to a key or |
| 347 | /// value in the DenseMap). |
| 348 | bool isPointerIntoBucketsArray(const void *Ptr) const { |
| 349 | return Ptr >= getBuckets() && Ptr < getBucketsEnd(); |
| 350 | } |
| 351 | |
| 352 | /// getPointerIntoBucketsArray() - Return an opaque pointer into the buckets |
| 353 | /// array. In conjunction with the previous method, this can be used to |
| 354 | /// determine whether an insertion caused the DenseMap to reallocate. |
| 355 | const void *getPointerIntoBucketsArray() const { return getBuckets(); } |
| 356 | |
| 357 | protected: |
| 358 | DenseMapBase() = default; |
| 359 | |
| 360 | void destroyAll() { |
| 361 | if (getNumBuckets() == 0) // Nothing to do. |
| 362 | return; |
| 363 | |
| 364 | const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey(); |
| 365 | for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) { |
| 366 | if (!KeyInfoT::isEqual(P->getFirst(), EmptyKey) && |
| 367 | !KeyInfoT::isEqual(P->getFirst(), TombstoneKey)) |
| 368 | P->getSecond().~ValueT(); |
| 369 | P->getFirst().~KeyT(); |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | void initEmpty() { |
| 374 | setNumEntries(0); |
| 375 | setNumTombstones(0); |
| 376 | |
| 377 | assert((getNumBuckets() & (getNumBuckets()-1)) == 0 &&((void)0) |
| 378 | "# initial buckets must be a power of two!")((void)0); |
| 379 | const KeyT EmptyKey = getEmptyKey(); |
| 380 | for (BucketT *B = getBuckets(), *E = getBucketsEnd(); B != E; ++B) |
| 381 | ::new (&B->getFirst()) KeyT(EmptyKey); |
| 382 | } |
| 383 | |
| 384 | /// Returns the number of buckets to allocate to ensure that the DenseMap can |
| 385 | /// accommodate \p NumEntries without need to grow(). |
| 386 | unsigned getMinBucketToReserveForEntries(unsigned NumEntries) { |
| 387 | // Ensure that "NumEntries * 4 < NumBuckets * 3" |
| 388 | if (NumEntries == 0) |
| 389 | return 0; |
| 390 | // +1 is required because of the strict equality. |
| 391 | // For example if NumEntries is 48, we need to return 401. |
| 392 | return NextPowerOf2(NumEntries * 4 / 3 + 1); |
| 393 | } |
| 394 | |
| 395 | void moveFromOldBuckets(BucketT *OldBucketsBegin, BucketT *OldBucketsEnd) { |
| 396 | initEmpty(); |
| 397 | |
| 398 | // Insert all the old elements. |
| 399 | const KeyT EmptyKey = getEmptyKey(); |
| 400 | const KeyT TombstoneKey = getTombstoneKey(); |
| 401 | for (BucketT *B = OldBucketsBegin, *E = OldBucketsEnd; B != E; ++B) { |
| 402 | if (!KeyInfoT::isEqual(B->getFirst(), EmptyKey) && |
| 403 | !KeyInfoT::isEqual(B->getFirst(), TombstoneKey)) { |
| 404 | // Insert the key/value into the new table. |
| 405 | BucketT *DestBucket; |
| 406 | bool FoundVal = LookupBucketFor(B->getFirst(), DestBucket); |
| 407 | (void)FoundVal; // silence warning. |
| 408 | assert(!FoundVal && "Key already in new map?")((void)0); |
| 409 | DestBucket->getFirst() = std::move(B->getFirst()); |
| 410 | ::new (&DestBucket->getSecond()) ValueT(std::move(B->getSecond())); |
| 411 | incrementNumEntries(); |
| 412 | |
| 413 | // Free the value. |
| 414 | B->getSecond().~ValueT(); |
| 415 | } |
| 416 | B->getFirst().~KeyT(); |
| 417 | } |
| 418 | } |
| 419 | |
| 420 | template <typename OtherBaseT> |
| 421 | void copyFrom( |
| 422 | const DenseMapBase<OtherBaseT, KeyT, ValueT, KeyInfoT, BucketT> &other) { |
| 423 | assert(&other != this)((void)0); |
| 424 | assert(getNumBuckets() == other.getNumBuckets())((void)0); |
| 425 | |
| 426 | setNumEntries(other.getNumEntries()); |
| 427 | setNumTombstones(other.getNumTombstones()); |
| 428 | |
| 429 | if (std::is_trivially_copyable<KeyT>::value && |
| 430 | std::is_trivially_copyable<ValueT>::value) |
| 431 | memcpy(reinterpret_cast<void *>(getBuckets()), other.getBuckets(), |
| 432 | getNumBuckets() * sizeof(BucketT)); |
| 433 | else |
| 434 | for (size_t i = 0; i < getNumBuckets(); ++i) { |
| 435 | ::new (&getBuckets()[i].getFirst()) |
| 436 | KeyT(other.getBuckets()[i].getFirst()); |
| 437 | if (!KeyInfoT::isEqual(getBuckets()[i].getFirst(), getEmptyKey()) && |
| 438 | !KeyInfoT::isEqual(getBuckets()[i].getFirst(), getTombstoneKey())) |
| 439 | ::new (&getBuckets()[i].getSecond()) |
| 440 | ValueT(other.getBuckets()[i].getSecond()); |
| 441 | } |
| 442 | } |
| 443 | |
| 444 | static unsigned getHashValue(const KeyT &Val) { |
| 445 | return KeyInfoT::getHashValue(Val); |
| 446 | } |
| 447 | |
| 448 | template<typename LookupKeyT> |
| 449 | static unsigned getHashValue(const LookupKeyT &Val) { |
| 450 | return KeyInfoT::getHashValue(Val); |
| 451 | } |
| 452 | |
| 453 | static const KeyT getEmptyKey() { |
| 454 | static_assert(std::is_base_of<DenseMapBase, DerivedT>::value, |
| 455 | "Must pass the derived type to this template!"); |
| 456 | return KeyInfoT::getEmptyKey(); |
| 457 | } |
| 458 | |
| 459 | static const KeyT getTombstoneKey() { |
| 460 | return KeyInfoT::getTombstoneKey(); |
| 461 | } |
| 462 | |
| 463 | private: |
| 464 | iterator makeIterator(BucketT *P, BucketT *E, |
| 465 | DebugEpochBase &Epoch, |
| 466 | bool NoAdvance=false) { |
| 467 | if (shouldReverseIterate<KeyT>()) { |
| 468 | BucketT *B = P == getBucketsEnd() ? getBuckets() : P + 1; |
| 469 | return iterator(B, E, Epoch, NoAdvance); |
| 470 | } |
| 471 | return iterator(P, E, Epoch, NoAdvance); |
| 472 | } |
| 473 | |
| 474 | const_iterator makeConstIterator(const BucketT *P, const BucketT *E, |
| 475 | const DebugEpochBase &Epoch, |
| 476 | const bool NoAdvance=false) const { |
| 477 | if (shouldReverseIterate<KeyT>()) { |
| 478 | const BucketT *B = P == getBucketsEnd() ? getBuckets() : P + 1; |
| 479 | return const_iterator(B, E, Epoch, NoAdvance); |
| 480 | } |
| 481 | return const_iterator(P, E, Epoch, NoAdvance); |
| 482 | } |
| 483 | |
| 484 | unsigned getNumEntries() const { |
| 485 | return static_cast<const DerivedT *>(this)->getNumEntries(); |
| 486 | } |
| 487 | |
| 488 | void setNumEntries(unsigned Num) { |
| 489 | static_cast<DerivedT *>(this)->setNumEntries(Num); |
| 490 | } |
| 491 | |
| 492 | void incrementNumEntries() { |
| 493 | setNumEntries(getNumEntries() + 1); |
| 494 | } |
| 495 | |
| 496 | void decrementNumEntries() { |
| 497 | setNumEntries(getNumEntries() - 1); |
| 498 | } |
| 499 | |
| 500 | unsigned getNumTombstones() const { |
| 501 | return static_cast<const DerivedT *>(this)->getNumTombstones(); |
| 502 | } |
| 503 | |
| 504 | void setNumTombstones(unsigned Num) { |
| 505 | static_cast<DerivedT *>(this)->setNumTombstones(Num); |
| 506 | } |
| 507 | |
| 508 | void incrementNumTombstones() { |
| 509 | setNumTombstones(getNumTombstones() + 1); |
| 510 | } |
| 511 | |
| 512 | void decrementNumTombstones() { |
| 513 | setNumTombstones(getNumTombstones() - 1); |
| 514 | } |
| 515 | |
| 516 | const BucketT *getBuckets() const { |
| 517 | return static_cast<const DerivedT *>(this)->getBuckets(); |
| 518 | } |
| 519 | |
| 520 | BucketT *getBuckets() { |
| 521 | return static_cast<DerivedT *>(this)->getBuckets(); |
| 522 | } |
| 523 | |
| 524 | unsigned getNumBuckets() const { |
| 525 | return static_cast<const DerivedT *>(this)->getNumBuckets(); |
| 526 | } |
| 527 | |
| 528 | BucketT *getBucketsEnd() { |
| 529 | return getBuckets() + getNumBuckets(); |
| 530 | } |
| 531 | |
| 532 | const BucketT *getBucketsEnd() const { |
| 533 | return getBuckets() + getNumBuckets(); |
| 534 | } |
| 535 | |
| 536 | void grow(unsigned AtLeast) { |
| 537 | static_cast<DerivedT *>(this)->grow(AtLeast); |
| 538 | } |
| 539 | |
| 540 | void shrink_and_clear() { |
| 541 | static_cast<DerivedT *>(this)->shrink_and_clear(); |
| 542 | } |
| 543 | |
| 544 | template <typename KeyArg, typename... ValueArgs> |
| 545 | BucketT *InsertIntoBucket(BucketT *TheBucket, KeyArg &&Key, |
| 546 | ValueArgs &&... Values) { |
| 547 | TheBucket = InsertIntoBucketImpl(Key, Key, TheBucket); |
| 548 | |
| 549 | TheBucket->getFirst() = std::forward<KeyArg>(Key); |
| 550 | ::new (&TheBucket->getSecond()) ValueT(std::forward<ValueArgs>(Values)...); |
| 551 | return TheBucket; |
| 552 | } |
| 553 | |
| 554 | template <typename LookupKeyT> |
| 555 | BucketT *InsertIntoBucketWithLookup(BucketT *TheBucket, KeyT &&Key, |
| 556 | ValueT &&Value, LookupKeyT &Lookup) { |
| 557 | TheBucket = InsertIntoBucketImpl(Key, Lookup, TheBucket); |
| 558 | |
| 559 | TheBucket->getFirst() = std::move(Key); |
| 560 | ::new (&TheBucket->getSecond()) ValueT(std::move(Value)); |
| 561 | return TheBucket; |
| 562 | } |
| 563 | |
| 564 | template <typename LookupKeyT> |
| 565 | BucketT *InsertIntoBucketImpl(const KeyT &Key, const LookupKeyT &Lookup, |
| 566 | BucketT *TheBucket) { |
| 567 | incrementEpoch(); |
| 568 | |
| 569 | // If the load of the hash table is more than 3/4, or if fewer than 1/8 of |
| 570 | // the buckets are empty (meaning that many are filled with tombstones), |
| 571 | // grow the table. |
| 572 | // |
| 573 | // The later case is tricky. For example, if we had one empty bucket with |
| 574 | // tons of tombstones, failing lookups (e.g. for insertion) would have to |
| 575 | // probe almost the entire table until it found the empty bucket. If the |
| 576 | // table completely filled with tombstones, no lookup would ever succeed, |
| 577 | // causing infinite loops in lookup. |
| 578 | unsigned NewNumEntries = getNumEntries() + 1; |
| 579 | unsigned NumBuckets = getNumBuckets(); |
| 580 | if (LLVM_UNLIKELY(NewNumEntries * 4 >= NumBuckets * 3)__builtin_expect((bool)(NewNumEntries * 4 >= NumBuckets * 3 ), false)) { |
| 581 | this->grow(NumBuckets * 2); |
| 582 | LookupBucketFor(Lookup, TheBucket); |
| 583 | NumBuckets = getNumBuckets(); |
| 584 | } else if (LLVM_UNLIKELY(NumBuckets-(NewNumEntries+getNumTombstones()) <=__builtin_expect((bool)(NumBuckets-(NewNumEntries+getNumTombstones ()) <= NumBuckets/8), false) |
| 585 | NumBuckets/8)__builtin_expect((bool)(NumBuckets-(NewNumEntries+getNumTombstones ()) <= NumBuckets/8), false)) { |
| 586 | this->grow(NumBuckets); |
| 587 | LookupBucketFor(Lookup, TheBucket); |
| 588 | } |
| 589 | assert(TheBucket)((void)0); |
| 590 | |
| 591 | // Only update the state after we've grown our bucket space appropriately |
| 592 | // so that when growing buckets we have self-consistent entry count. |
| 593 | incrementNumEntries(); |
| 594 | |
| 595 | // If we are writing over a tombstone, remember this. |
| 596 | const KeyT EmptyKey = getEmptyKey(); |
| 597 | if (!KeyInfoT::isEqual(TheBucket->getFirst(), EmptyKey)) |
| 598 | decrementNumTombstones(); |
| 599 | |
| 600 | return TheBucket; |
| 601 | } |
| 602 | |
| 603 | /// LookupBucketFor - Lookup the appropriate bucket for Val, returning it in |
| 604 | /// FoundBucket. If the bucket contains the key and a value, this returns |
| 605 | /// true, otherwise it returns a bucket with an empty marker or tombstone and |
| 606 | /// returns false. |
| 607 | template<typename LookupKeyT> |
| 608 | bool LookupBucketFor(const LookupKeyT &Val, |
| 609 | const BucketT *&FoundBucket) const { |
| 610 | const BucketT *BucketsPtr = getBuckets(); |
| 611 | const unsigned NumBuckets = getNumBuckets(); |
| 612 | |
| 613 | if (NumBuckets == 0) { |
| 614 | FoundBucket = nullptr; |
| 615 | return false; |
| 616 | } |
| 617 | |
| 618 | // FoundTombstone - Keep track of whether we find a tombstone while probing. |
| 619 | const BucketT *FoundTombstone = nullptr; |
| 620 | const KeyT EmptyKey = getEmptyKey(); |
| 621 | const KeyT TombstoneKey = getTombstoneKey(); |
| 622 | assert(!KeyInfoT::isEqual(Val, EmptyKey) &&((void)0) |
| 623 | !KeyInfoT::isEqual(Val, TombstoneKey) &&((void)0) |
| 624 | "Empty/Tombstone value shouldn't be inserted into map!")((void)0); |
| 625 | |
| 626 | unsigned BucketNo = getHashValue(Val) & (NumBuckets-1); |
| 627 | unsigned ProbeAmt = 1; |
| 628 | while (true) { |
| 629 | const BucketT *ThisBucket = BucketsPtr + BucketNo; |
| 630 | // Found Val's bucket? If so, return it. |
| 631 | if (LLVM_LIKELY(KeyInfoT::isEqual(Val, ThisBucket->getFirst()))__builtin_expect((bool)(KeyInfoT::isEqual(Val, ThisBucket-> getFirst())), true)) { |
| 632 | FoundBucket = ThisBucket; |
| 633 | return true; |
| 634 | } |
| 635 | |
| 636 | // If we found an empty bucket, the key doesn't exist in the set. |
| 637 | // Insert it and return the default value. |
| 638 | if (LLVM_LIKELY(KeyInfoT::isEqual(ThisBucket->getFirst(), EmptyKey))__builtin_expect((bool)(KeyInfoT::isEqual(ThisBucket->getFirst (), EmptyKey)), true)) { |
| 639 | // If we've already seen a tombstone while probing, fill it in instead |
| 640 | // of the empty bucket we eventually probed to. |
| 641 | FoundBucket = FoundTombstone ? FoundTombstone : ThisBucket; |
| 642 | return false; |
| 643 | } |
| 644 | |
| 645 | // If this is a tombstone, remember it. If Val ends up not in the map, we |
| 646 | // prefer to return it than something that would require more probing. |
| 647 | if (KeyInfoT::isEqual(ThisBucket->getFirst(), TombstoneKey) && |
| 648 | !FoundTombstone) |
| 649 | FoundTombstone = ThisBucket; // Remember the first tombstone found. |
| 650 | |
| 651 | // Otherwise, it's a hash collision or a tombstone, continue quadratic |
| 652 | // probing. |
| 653 | BucketNo += ProbeAmt++; |
| 654 | BucketNo &= (NumBuckets-1); |
| 655 | } |
| 656 | } |
| 657 | |
| 658 | template <typename LookupKeyT> |
| 659 | bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) { |
| 660 | const BucketT *ConstFoundBucket; |
| 661 | bool Result = const_cast<const DenseMapBase *>(this) |
| 662 | ->LookupBucketFor(Val, ConstFoundBucket); |
| 663 | FoundBucket = const_cast<BucketT *>(ConstFoundBucket); |
| 664 | return Result; |
| 665 | } |
| 666 | |
| 667 | public: |
| 668 | /// Return the approximate size (in bytes) of the actual map. |
| 669 | /// This is just the raw memory used by DenseMap. |
| 670 | /// If entries are pointers to objects, the size of the referenced objects |
| 671 | /// are not included. |
| 672 | size_t getMemorySize() const { |
| 673 | return getNumBuckets() * sizeof(BucketT); |
| 674 | } |
| 675 | }; |
| 676 | |
| 677 | /// Equality comparison for DenseMap. |
| 678 | /// |
| 679 | /// Iterates over elements of LHS confirming that each (key, value) pair in LHS |
| 680 | /// is also in RHS, and that no additional pairs are in RHS. |
| 681 | /// Equivalent to N calls to RHS.find and N value comparisons. Amortized |
| 682 | /// complexity is linear, worst case is O(N^2) (if every hash collides). |
| 683 | template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT, |
| 684 | typename BucketT> |
| 685 | bool operator==( |
| 686 | const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &LHS, |
| 687 | const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &RHS) { |
| 688 | if (LHS.size() != RHS.size()) |
| 689 | return false; |
| 690 | |
| 691 | for (auto &KV : LHS) { |
| 692 | auto I = RHS.find(KV.first); |
| 693 | if (I == RHS.end() || I->second != KV.second) |
| 694 | return false; |
| 695 | } |
| 696 | |
| 697 | return true; |
| 698 | } |
| 699 | |
| 700 | /// Inequality comparison for DenseMap. |
| 701 | /// |
| 702 | /// Equivalent to !(LHS == RHS). See operator== for performance notes. |
| 703 | template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT, |
| 704 | typename BucketT> |
| 705 | bool operator!=( |
| 706 | const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &LHS, |
| 707 | const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &RHS) { |
| 708 | return !(LHS == RHS); |
| 709 | } |
| 710 | |
| 711 | template <typename KeyT, typename ValueT, |
| 712 | typename KeyInfoT = DenseMapInfo<KeyT>, |
| 713 | typename BucketT = llvm::detail::DenseMapPair<KeyT, ValueT>> |
| 714 | class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>, |
| 715 | KeyT, ValueT, KeyInfoT, BucketT> { |
| 716 | friend class DenseMapBase<DenseMap, KeyT, ValueT, KeyInfoT, BucketT>; |
| 717 | |
| 718 | // Lift some types from the dependent base class into this class for |
| 719 | // simplicity of referring to them. |
| 720 | using BaseT = DenseMapBase<DenseMap, KeyT, ValueT, KeyInfoT, BucketT>; |
| 721 | |
| 722 | BucketT *Buckets; |
| 723 | unsigned NumEntries; |
| 724 | unsigned NumTombstones; |
| 725 | unsigned NumBuckets; |
| 726 | |
| 727 | public: |
| 728 | /// Create a DenseMap with an optional \p InitialReserve that guarantee that |
| 729 | /// this number of elements can be inserted in the map without grow() |
| 730 | explicit DenseMap(unsigned InitialReserve = 0) { init(InitialReserve); } |
| 731 | |
| 732 | DenseMap(const DenseMap &other) : BaseT() { |
| 733 | init(0); |
| 734 | copyFrom(other); |
| 735 | } |
| 736 | |
| 737 | DenseMap(DenseMap &&other) : BaseT() { |
| 738 | init(0); |
| 739 | swap(other); |
| 740 | } |
| 741 | |
| 742 | template<typename InputIt> |
| 743 | DenseMap(const InputIt &I, const InputIt &E) { |
| 744 | init(std::distance(I, E)); |
| 745 | this->insert(I, E); |
| 746 | } |
| 747 | |
| 748 | DenseMap(std::initializer_list<typename BaseT::value_type> Vals) { |
| 749 | init(Vals.size()); |
| 750 | this->insert(Vals.begin(), Vals.end()); |
| 751 | } |
| 752 | |
| 753 | ~DenseMap() { |
| 754 | this->destroyAll(); |
| 755 | deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); |
| 756 | } |
| 757 | |
| 758 | void swap(DenseMap& RHS) { |
| 759 | this->incrementEpoch(); |
| 760 | RHS.incrementEpoch(); |
| 761 | std::swap(Buckets, RHS.Buckets); |
| 762 | std::swap(NumEntries, RHS.NumEntries); |
| 763 | std::swap(NumTombstones, RHS.NumTombstones); |
| 764 | std::swap(NumBuckets, RHS.NumBuckets); |
| 765 | } |
| 766 | |
| 767 | DenseMap& operator=(const DenseMap& other) { |
| 768 | if (&other != this) |
| 769 | copyFrom(other); |
| 770 | return *this; |
| 771 | } |
| 772 | |
| 773 | DenseMap& operator=(DenseMap &&other) { |
| 774 | this->destroyAll(); |
| 775 | deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); |
| 776 | init(0); |
| 777 | swap(other); |
| 778 | return *this; |
| 779 | } |
| 780 | |
| 781 | void copyFrom(const DenseMap& other) { |
| 782 | this->destroyAll(); |
| 783 | deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); |
| 784 | if (allocateBuckets(other.NumBuckets)) { |
| 785 | this->BaseT::copyFrom(other); |
| 786 | } else { |
| 787 | NumEntries = 0; |
| 788 | NumTombstones = 0; |
| 789 | } |
| 790 | } |
| 791 | |
| 792 | void init(unsigned InitNumEntries) { |
| 793 | auto InitBuckets = BaseT::getMinBucketToReserveForEntries(InitNumEntries); |
| 794 | if (allocateBuckets(InitBuckets)) { |
| 795 | this->BaseT::initEmpty(); |
| 796 | } else { |
| 797 | NumEntries = 0; |
| 798 | NumTombstones = 0; |
| 799 | } |
| 800 | } |
| 801 | |
| 802 | void grow(unsigned AtLeast) { |
| 803 | unsigned OldNumBuckets = NumBuckets; |
| 804 | BucketT *OldBuckets = Buckets; |
| 805 | |
| 806 | allocateBuckets(std::max<unsigned>(64, static_cast<unsigned>(NextPowerOf2(AtLeast-1)))); |
| 807 | assert(Buckets)((void)0); |
| 808 | if (!OldBuckets) { |
| 809 | this->BaseT::initEmpty(); |
| 810 | return; |
| 811 | } |
| 812 | |
| 813 | this->moveFromOldBuckets(OldBuckets, OldBuckets+OldNumBuckets); |
| 814 | |
| 815 | // Free the old table. |
| 816 | deallocate_buffer(OldBuckets, sizeof(BucketT) * OldNumBuckets, |
| 817 | alignof(BucketT)); |
| 818 | } |
| 819 | |
| 820 | void shrink_and_clear() { |
| 821 | unsigned OldNumBuckets = NumBuckets; |
| 822 | unsigned OldNumEntries = NumEntries; |
| 823 | this->destroyAll(); |
| 824 | |
| 825 | // Reduce the number of buckets. |
| 826 | unsigned NewNumBuckets = 0; |
| 827 | if (OldNumEntries) |
| 828 | NewNumBuckets = std::max(64, 1 << (Log2_32_Ceil(OldNumEntries) + 1)); |
| 829 | if (NewNumBuckets == NumBuckets) { |
| 830 | this->BaseT::initEmpty(); |
| 831 | return; |
| 832 | } |
| 833 | |
| 834 | deallocate_buffer(Buckets, sizeof(BucketT) * OldNumBuckets, |
| 835 | alignof(BucketT)); |
| 836 | init(NewNumBuckets); |
| 837 | } |
| 838 | |
| 839 | private: |
| 840 | unsigned getNumEntries() const { |
| 841 | return NumEntries; |
| 842 | } |
| 843 | |
| 844 | void setNumEntries(unsigned Num) { |
| 845 | NumEntries = Num; |
| 846 | } |
| 847 | |
| 848 | unsigned getNumTombstones() const { |
| 849 | return NumTombstones; |
| 850 | } |
| 851 | |
| 852 | void setNumTombstones(unsigned Num) { |
| 853 | NumTombstones = Num; |
| 854 | } |
| 855 | |
| 856 | BucketT *getBuckets() const { |
| 857 | return Buckets; |
| 858 | } |
| 859 | |
| 860 | unsigned getNumBuckets() const { |
| 861 | return NumBuckets; |
| 862 | } |
| 863 | |
| 864 | bool allocateBuckets(unsigned Num) { |
| 865 | NumBuckets = Num; |
| 866 | if (NumBuckets == 0) { |
| 867 | Buckets = nullptr; |
| 868 | return false; |
| 869 | } |
| 870 | |
| 871 | Buckets = static_cast<BucketT *>( |
| 872 | allocate_buffer(sizeof(BucketT) * NumBuckets, alignof(BucketT))); |
| 873 | return true; |
| 874 | } |
| 875 | }; |
| 876 | |
| 877 | template <typename KeyT, typename ValueT, unsigned InlineBuckets = 4, |
| 878 | typename KeyInfoT = DenseMapInfo<KeyT>, |
| 879 | typename BucketT = llvm::detail::DenseMapPair<KeyT, ValueT>> |
| 880 | class SmallDenseMap |
| 881 | : public DenseMapBase< |
| 882 | SmallDenseMap<KeyT, ValueT, InlineBuckets, KeyInfoT, BucketT>, KeyT, |
| 883 | ValueT, KeyInfoT, BucketT> { |
| 884 | friend class DenseMapBase<SmallDenseMap, KeyT, ValueT, KeyInfoT, BucketT>; |
| 885 | |
| 886 | // Lift some types from the dependent base class into this class for |
| 887 | // simplicity of referring to them. |
| 888 | using BaseT = DenseMapBase<SmallDenseMap, KeyT, ValueT, KeyInfoT, BucketT>; |
| 889 | |
| 890 | static_assert(isPowerOf2_64(InlineBuckets), |
| 891 | "InlineBuckets must be a power of 2."); |
| 892 | |
| 893 | unsigned Small : 1; |
| 894 | unsigned NumEntries : 31; |
| 895 | unsigned NumTombstones; |
| 896 | |
| 897 | struct LargeRep { |
| 898 | BucketT *Buckets; |
| 899 | unsigned NumBuckets; |
| 900 | }; |
| 901 | |
| 902 | /// A "union" of an inline bucket array and the struct representing |
| 903 | /// a large bucket. This union will be discriminated by the 'Small' bit. |
| 904 | AlignedCharArrayUnion<BucketT[InlineBuckets], LargeRep> storage; |
| 905 | |
| 906 | public: |
| 907 | explicit SmallDenseMap(unsigned NumInitBuckets = 0) { |
| 908 | init(NumInitBuckets); |
| 909 | } |
| 910 | |
| 911 | SmallDenseMap(const SmallDenseMap &other) : BaseT() { |
| 912 | init(0); |
| 913 | copyFrom(other); |
| 914 | } |
| 915 | |
| 916 | SmallDenseMap(SmallDenseMap &&other) : BaseT() { |
| 917 | init(0); |
| 918 | swap(other); |
| 919 | } |
| 920 | |
| 921 | template<typename InputIt> |
| 922 | SmallDenseMap(const InputIt &I, const InputIt &E) { |
| 923 | init(NextPowerOf2(std::distance(I, E))); |
| 924 | this->insert(I, E); |
| 925 | } |
| 926 | |
| 927 | SmallDenseMap(std::initializer_list<typename BaseT::value_type> Vals) |
| 928 | : SmallDenseMap(Vals.begin(), Vals.end()) {} |
| 929 | |
| 930 | ~SmallDenseMap() { |
| 931 | this->destroyAll(); |
| 932 | deallocateBuckets(); |
| 933 | } |
| 934 | |
| 935 | void swap(SmallDenseMap& RHS) { |
| 936 | unsigned TmpNumEntries = RHS.NumEntries; |
| 937 | RHS.NumEntries = NumEntries; |
| 938 | NumEntries = TmpNumEntries; |
| 939 | std::swap(NumTombstones, RHS.NumTombstones); |
| 940 | |
| 941 | const KeyT EmptyKey = this->getEmptyKey(); |
| 942 | const KeyT TombstoneKey = this->getTombstoneKey(); |
| 943 | if (Small && RHS.Small) { |
| 944 | // If we're swapping inline bucket arrays, we have to cope with some of |
| 945 | // the tricky bits of DenseMap's storage system: the buckets are not |
| 946 | // fully initialized. Thus we swap every key, but we may have |
| 947 | // a one-directional move of the value. |
| 948 | for (unsigned i = 0, e = InlineBuckets; i != e; ++i) { |
| 949 | BucketT *LHSB = &getInlineBuckets()[i], |
| 950 | *RHSB = &RHS.getInlineBuckets()[i]; |
| 951 | bool hasLHSValue = (!KeyInfoT::isEqual(LHSB->getFirst(), EmptyKey) && |
| 952 | !KeyInfoT::isEqual(LHSB->getFirst(), TombstoneKey)); |
| 953 | bool hasRHSValue = (!KeyInfoT::isEqual(RHSB->getFirst(), EmptyKey) && |
| 954 | !KeyInfoT::isEqual(RHSB->getFirst(), TombstoneKey)); |
| 955 | if (hasLHSValue && hasRHSValue) { |
| 956 | // Swap together if we can... |
| 957 | std::swap(*LHSB, *RHSB); |
| 958 | continue; |
| 959 | } |
| 960 | // Swap separately and handle any asymmetry. |
| 961 | std::swap(LHSB->getFirst(), RHSB->getFirst()); |
| 962 | if (hasLHSValue) { |
| 963 | ::new (&RHSB->getSecond()) ValueT(std::move(LHSB->getSecond())); |
| 964 | LHSB->getSecond().~ValueT(); |
| 965 | } else if (hasRHSValue) { |
| 966 | ::new (&LHSB->getSecond()) ValueT(std::move(RHSB->getSecond())); |
| 967 | RHSB->getSecond().~ValueT(); |
| 968 | } |
| 969 | } |
| 970 | return; |
| 971 | } |
| 972 | if (!Small && !RHS.Small) { |
| 973 | std::swap(getLargeRep()->Buckets, RHS.getLargeRep()->Buckets); |
| 974 | std::swap(getLargeRep()->NumBuckets, RHS.getLargeRep()->NumBuckets); |
| 975 | return; |
| 976 | } |
| 977 | |
| 978 | SmallDenseMap &SmallSide = Small ? *this : RHS; |
| 979 | SmallDenseMap &LargeSide = Small ? RHS : *this; |
| 980 | |
| 981 | // First stash the large side's rep and move the small side across. |
| 982 | LargeRep TmpRep = std::move(*LargeSide.getLargeRep()); |
| 983 | LargeSide.getLargeRep()->~LargeRep(); |
| 984 | LargeSide.Small = true; |
| 985 | // This is similar to the standard move-from-old-buckets, but the bucket |
| 986 | // count hasn't actually rotated in this case. So we have to carefully |
| 987 | // move construct the keys and values into their new locations, but there |
| 988 | // is no need to re-hash things. |
| 989 | for (unsigned i = 0, e = InlineBuckets; i != e; ++i) { |
| 990 | BucketT *NewB = &LargeSide.getInlineBuckets()[i], |
| 991 | *OldB = &SmallSide.getInlineBuckets()[i]; |
| 992 | ::new (&NewB->getFirst()) KeyT(std::move(OldB->getFirst())); |
| 993 | OldB->getFirst().~KeyT(); |
| 994 | if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey) && |
| 995 | !KeyInfoT::isEqual(NewB->getFirst(), TombstoneKey)) { |
| 996 | ::new (&NewB->getSecond()) ValueT(std::move(OldB->getSecond())); |
| 997 | OldB->getSecond().~ValueT(); |
| 998 | } |
| 999 | } |
| 1000 | |
| 1001 | // The hard part of moving the small buckets across is done, just move |
| 1002 | // the TmpRep into its new home. |
| 1003 | SmallSide.Small = false; |
| 1004 | new (SmallSide.getLargeRep()) LargeRep(std::move(TmpRep)); |
| 1005 | } |
| 1006 | |
| 1007 | SmallDenseMap& operator=(const SmallDenseMap& other) { |
| 1008 | if (&other != this) |
| 1009 | copyFrom(other); |
| 1010 | return *this; |
| 1011 | } |
| 1012 | |
| 1013 | SmallDenseMap& operator=(SmallDenseMap &&other) { |
| 1014 | this->destroyAll(); |
| 1015 | deallocateBuckets(); |
| 1016 | init(0); |
| 1017 | swap(other); |
| 1018 | return *this; |
| 1019 | } |
| 1020 | |
| 1021 | void copyFrom(const SmallDenseMap& other) { |
| 1022 | this->destroyAll(); |
| 1023 | deallocateBuckets(); |
| 1024 | Small = true; |
| 1025 | if (other.getNumBuckets() > InlineBuckets) { |
| 1026 | Small = false; |
| 1027 | new (getLargeRep()) LargeRep(allocateBuckets(other.getNumBuckets())); |
| 1028 | } |
| 1029 | this->BaseT::copyFrom(other); |
| 1030 | } |
| 1031 | |
| 1032 | void init(unsigned InitBuckets) { |
| 1033 | Small = true; |
| 1034 | if (InitBuckets > InlineBuckets) { |
| 1035 | Small = false; |
| 1036 | new (getLargeRep()) LargeRep(allocateBuckets(InitBuckets)); |
| 1037 | } |
| 1038 | this->BaseT::initEmpty(); |
| 1039 | } |
| 1040 | |
| 1041 | void grow(unsigned AtLeast) { |
| 1042 | if (AtLeast > InlineBuckets) |
| 1043 | AtLeast = std::max<unsigned>(64, NextPowerOf2(AtLeast-1)); |
| 1044 | |
| 1045 | if (Small) { |
| 1046 | // First move the inline buckets into a temporary storage. |
| 1047 | AlignedCharArrayUnion<BucketT[InlineBuckets]> TmpStorage; |
| 1048 | BucketT *TmpBegin = reinterpret_cast<BucketT *>(&TmpStorage); |
| 1049 | BucketT *TmpEnd = TmpBegin; |
| 1050 | |
| 1051 | // Loop over the buckets, moving non-empty, non-tombstones into the |
| 1052 | // temporary storage. Have the loop move the TmpEnd forward as it goes. |
| 1053 | const KeyT EmptyKey = this->getEmptyKey(); |
| 1054 | const KeyT TombstoneKey = this->getTombstoneKey(); |
| 1055 | for (BucketT *P = getBuckets(), *E = P + InlineBuckets; P != E; ++P) { |
| 1056 | if (!KeyInfoT::isEqual(P->getFirst(), EmptyKey) && |
| 1057 | !KeyInfoT::isEqual(P->getFirst(), TombstoneKey)) { |
| 1058 | assert(size_t(TmpEnd - TmpBegin) < InlineBuckets &&((void)0) |
| 1059 | "Too many inline buckets!")((void)0); |
| 1060 | ::new (&TmpEnd->getFirst()) KeyT(std::move(P->getFirst())); |
| 1061 | ::new (&TmpEnd->getSecond()) ValueT(std::move(P->getSecond())); |
| 1062 | ++TmpEnd; |
| 1063 | P->getSecond().~ValueT(); |
| 1064 | } |
| 1065 | P->getFirst().~KeyT(); |
| 1066 | } |
| 1067 | |
| 1068 | // AtLeast == InlineBuckets can happen if there are many tombstones, |
| 1069 | // and grow() is used to remove them. Usually we always switch to the |
| 1070 | // large rep here. |
| 1071 | if (AtLeast > InlineBuckets) { |
| 1072 | Small = false; |
| 1073 | new (getLargeRep()) LargeRep(allocateBuckets(AtLeast)); |
| 1074 | } |
| 1075 | this->moveFromOldBuckets(TmpBegin, TmpEnd); |
| 1076 | return; |
| 1077 | } |
| 1078 | |
| 1079 | LargeRep OldRep = std::move(*getLargeRep()); |
| 1080 | getLargeRep()->~LargeRep(); |
| 1081 | if (AtLeast <= InlineBuckets) { |
| 1082 | Small = true; |
| 1083 | } else { |
| 1084 | new (getLargeRep()) LargeRep(allocateBuckets(AtLeast)); |
| 1085 | } |
| 1086 | |
| 1087 | this->moveFromOldBuckets(OldRep.Buckets, OldRep.Buckets+OldRep.NumBuckets); |
| 1088 | |
| 1089 | // Free the old table. |
| 1090 | deallocate_buffer(OldRep.Buckets, sizeof(BucketT) * OldRep.NumBuckets, |
| 1091 | alignof(BucketT)); |
| 1092 | } |
| 1093 | |
| 1094 | void shrink_and_clear() { |
| 1095 | unsigned OldSize = this->size(); |
| 1096 | this->destroyAll(); |
| 1097 | |
| 1098 | // Reduce the number of buckets. |
| 1099 | unsigned NewNumBuckets = 0; |
| 1100 | if (OldSize) { |
| 1101 | NewNumBuckets = 1 << (Log2_32_Ceil(OldSize) + 1); |
| 1102 | if (NewNumBuckets > InlineBuckets && NewNumBuckets < 64u) |
| 1103 | NewNumBuckets = 64; |
| 1104 | } |
| 1105 | if ((Small && NewNumBuckets <= InlineBuckets) || |
| 1106 | (!Small && NewNumBuckets == getLargeRep()->NumBuckets)) { |
| 1107 | this->BaseT::initEmpty(); |
| 1108 | return; |
| 1109 | } |
| 1110 | |
| 1111 | deallocateBuckets(); |
| 1112 | init(NewNumBuckets); |
| 1113 | } |
| 1114 | |
| 1115 | private: |
| 1116 | unsigned getNumEntries() const { |
| 1117 | return NumEntries; |
| 1118 | } |
| 1119 | |
| 1120 | void setNumEntries(unsigned Num) { |
| 1121 | // NumEntries is hardcoded to be 31 bits wide. |
| 1122 | assert(Num < (1U << 31) && "Cannot support more than 1<<31 entries")((void)0); |
| 1123 | NumEntries = Num; |
| 1124 | } |
| 1125 | |
| 1126 | unsigned getNumTombstones() const { |
| 1127 | return NumTombstones; |
| 1128 | } |
| 1129 | |
| 1130 | void setNumTombstones(unsigned Num) { |
| 1131 | NumTombstones = Num; |
| 1132 | } |
| 1133 | |
| 1134 | const BucketT *getInlineBuckets() const { |
| 1135 | assert(Small)((void)0); |
| 1136 | // Note that this cast does not violate aliasing rules as we assert that |
| 1137 | // the memory's dynamic type is the small, inline bucket buffer, and the |
| 1138 | // 'storage' is a POD containing a char buffer. |
| 1139 | return reinterpret_cast<const BucketT *>(&storage); |
| 1140 | } |
| 1141 | |
| 1142 | BucketT *getInlineBuckets() { |
| 1143 | return const_cast<BucketT *>( |
| 1144 | const_cast<const SmallDenseMap *>(this)->getInlineBuckets()); |
| 1145 | } |
| 1146 | |
| 1147 | const LargeRep *getLargeRep() const { |
| 1148 | assert(!Small)((void)0); |
| 1149 | // Note, same rule about aliasing as with getInlineBuckets. |
| 1150 | return reinterpret_cast<const LargeRep *>(&storage); |
| 1151 | } |
| 1152 | |
| 1153 | LargeRep *getLargeRep() { |
| 1154 | return const_cast<LargeRep *>( |
| 1155 | const_cast<const SmallDenseMap *>(this)->getLargeRep()); |
| 1156 | } |
| 1157 | |
| 1158 | const BucketT *getBuckets() const { |
| 1159 | return Small ? getInlineBuckets() : getLargeRep()->Buckets; |
| 1160 | } |
| 1161 | |
| 1162 | BucketT *getBuckets() { |
| 1163 | return const_cast<BucketT *>( |
| 1164 | const_cast<const SmallDenseMap *>(this)->getBuckets()); |
| 1165 | } |
| 1166 | |
| 1167 | unsigned getNumBuckets() const { |
| 1168 | return Small ? InlineBuckets : getLargeRep()->NumBuckets; |
| 1169 | } |
| 1170 | |
| 1171 | void deallocateBuckets() { |
| 1172 | if (Small) |
| 1173 | return; |
| 1174 | |
| 1175 | deallocate_buffer(getLargeRep()->Buckets, |
| 1176 | sizeof(BucketT) * getLargeRep()->NumBuckets, |
| 1177 | alignof(BucketT)); |
| 1178 | getLargeRep()->~LargeRep(); |
| 1179 | } |
| 1180 | |
| 1181 | LargeRep allocateBuckets(unsigned Num) { |
| 1182 | assert(Num > InlineBuckets && "Must allocate more buckets than are inline")((void)0); |
| 1183 | LargeRep Rep = {static_cast<BucketT *>(allocate_buffer( |
| 1184 | sizeof(BucketT) * Num, alignof(BucketT))), |
| 1185 | Num}; |
| 1186 | return Rep; |
| 1187 | } |
| 1188 | }; |
| 1189 | |
| 1190 | template <typename KeyT, typename ValueT, typename KeyInfoT, typename Bucket, |
| 1191 | bool IsConst> |
| 1192 | class DenseMapIterator : DebugEpochBase::HandleBase { |
| 1193 | friend class DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, true>; |
| 1194 | friend class DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, false>; |
| 1195 | |
| 1196 | public: |
| 1197 | using difference_type = ptrdiff_t; |
| 1198 | using value_type = |
| 1199 | typename std::conditional<IsConst, const Bucket, Bucket>::type; |
| 1200 | using pointer = value_type *; |
| 1201 | using reference = value_type &; |
| 1202 | using iterator_category = std::forward_iterator_tag; |
| 1203 | |
| 1204 | private: |
| 1205 | pointer Ptr = nullptr; |
| 1206 | pointer End = nullptr; |
| 1207 | |
| 1208 | public: |
| 1209 | DenseMapIterator() = default; |
| 1210 | |
| 1211 | DenseMapIterator(pointer Pos, pointer E, const DebugEpochBase &Epoch, |
| 1212 | bool NoAdvance = false) |
| 1213 | : DebugEpochBase::HandleBase(&Epoch), Ptr(Pos), End(E) { |
| 1214 | assert(isHandleInSync() && "invalid construction!")((void)0); |
| 1215 | |
| 1216 | if (NoAdvance) return; |
| 1217 | if (shouldReverseIterate<KeyT>()) { |
| 1218 | RetreatPastEmptyBuckets(); |
| 1219 | return; |
| 1220 | } |
| 1221 | AdvancePastEmptyBuckets(); |
| 1222 | } |
| 1223 | |
| 1224 | // Converting ctor from non-const iterators to const iterators. SFINAE'd out |
| 1225 | // for const iterator destinations so it doesn't end up as a user defined copy |
| 1226 | // constructor. |
| 1227 | template <bool IsConstSrc, |
| 1228 | typename = std::enable_if_t<!IsConstSrc && IsConst>> |
| 1229 | DenseMapIterator( |
| 1230 | const DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, IsConstSrc> &I) |
| 1231 | : DebugEpochBase::HandleBase(I), Ptr(I.Ptr), End(I.End) {} |
| 1232 | |
| 1233 | reference operator*() const { |
| 1234 | assert(isHandleInSync() && "invalid iterator access!")((void)0); |
| 1235 | assert(Ptr != End && "dereferencing end() iterator")((void)0); |
| 1236 | if (shouldReverseIterate<KeyT>()) |
| 1237 | return Ptr[-1]; |
| 1238 | return *Ptr; |
| 1239 | } |
| 1240 | pointer operator->() const { |
| 1241 | assert(isHandleInSync() && "invalid iterator access!")((void)0); |
| 1242 | assert(Ptr != End && "dereferencing end() iterator")((void)0); |
| 1243 | if (shouldReverseIterate<KeyT>()) |
| 1244 | return &(Ptr[-1]); |
| 1245 | return Ptr; |
| 1246 | } |
| 1247 | |
| 1248 | friend bool operator==(const DenseMapIterator &LHS, |
| 1249 | const DenseMapIterator &RHS) { |
| 1250 | assert((!LHS.Ptr || LHS.isHandleInSync()) && "handle not in sync!")((void)0); |
| 1251 | assert((!RHS.Ptr || RHS.isHandleInSync()) && "handle not in sync!")((void)0); |
| 1252 | assert(LHS.getEpochAddress() == RHS.getEpochAddress() &&((void)0) |
| 1253 | "comparing incomparable iterators!")((void)0); |
| 1254 | return LHS.Ptr == RHS.Ptr; |
| 1255 | } |
| 1256 | |
| 1257 | friend bool operator!=(const DenseMapIterator &LHS, |
| 1258 | const DenseMapIterator &RHS) { |
| 1259 | return !(LHS == RHS); |
| 1260 | } |
| 1261 | |
| 1262 | inline DenseMapIterator& operator++() { // Preincrement |
| 1263 | assert(isHandleInSync() && "invalid iterator access!")((void)0); |
| 1264 | assert(Ptr != End && "incrementing end() iterator")((void)0); |
| 1265 | if (shouldReverseIterate<KeyT>()) { |
| 1266 | --Ptr; |
| 1267 | RetreatPastEmptyBuckets(); |
| 1268 | return *this; |
| 1269 | } |
| 1270 | ++Ptr; |
| 1271 | AdvancePastEmptyBuckets(); |
| 1272 | return *this; |
| 1273 | } |
| 1274 | DenseMapIterator operator++(int) { // Postincrement |
| 1275 | assert(isHandleInSync() && "invalid iterator access!")((void)0); |
| 1276 | DenseMapIterator tmp = *this; ++*this; return tmp; |
| 1277 | } |
| 1278 | |
| 1279 | private: |
| 1280 | void AdvancePastEmptyBuckets() { |
| 1281 | assert(Ptr <= End)((void)0); |
| 1282 | const KeyT Empty = KeyInfoT::getEmptyKey(); |
| 1283 | const KeyT Tombstone = KeyInfoT::getTombstoneKey(); |
| 1284 | |
| 1285 | while (Ptr != End && (KeyInfoT::isEqual(Ptr->getFirst(), Empty) || |
| 1286 | KeyInfoT::isEqual(Ptr->getFirst(), Tombstone))) |
| 1287 | ++Ptr; |
| 1288 | } |
| 1289 | |
| 1290 | void RetreatPastEmptyBuckets() { |
| 1291 | assert(Ptr >= End)((void)0); |
| 1292 | const KeyT Empty = KeyInfoT::getEmptyKey(); |
| 1293 | const KeyT Tombstone = KeyInfoT::getTombstoneKey(); |
| 1294 | |
| 1295 | while (Ptr != End && (KeyInfoT::isEqual(Ptr[-1].getFirst(), Empty) || |
| 1296 | KeyInfoT::isEqual(Ptr[-1].getFirst(), Tombstone))) |
| 1297 | --Ptr; |
| 1298 | } |
| 1299 | }; |
| 1300 | |
| 1301 | template <typename KeyT, typename ValueT, typename KeyInfoT> |
| 1302 | inline size_t capacity_in_bytes(const DenseMap<KeyT, ValueT, KeyInfoT> &X) { |
| 1303 | return X.getMemorySize(); |
| 1304 | } |
| 1305 | |
| 1306 | } // end namespace llvm |
| 1307 | |
| 1308 | #endif // LLVM_ADT_DENSEMAP_H |