| File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support/Alignment.h |
| Warning: | line 85, column 47 The result of the left shift is undefined due to shifting by '255', which is greater or equal to the width of type 'uint64_t' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | // | |||
| 9 | // This file contains a pass (at IR level) to replace atomic instructions with | |||
| 10 | // __atomic_* library calls, or target specific instruction which implement the | |||
| 11 | // same semantics in a way which better fits the target backend. This can | |||
| 12 | // include the use of (intrinsic-based) load-linked/store-conditional loops, | |||
| 13 | // AtomicCmpXchg, or type coercions. | |||
| 14 | // | |||
| 15 | //===----------------------------------------------------------------------===// | |||
| 16 | ||||
| 17 | #include "llvm/ADT/ArrayRef.h" | |||
| 18 | #include "llvm/ADT/STLExtras.h" | |||
| 19 | #include "llvm/ADT/SmallVector.h" | |||
| 20 | #include "llvm/CodeGen/AtomicExpandUtils.h" | |||
| 21 | #include "llvm/CodeGen/RuntimeLibcalls.h" | |||
| 22 | #include "llvm/CodeGen/TargetLowering.h" | |||
| 23 | #include "llvm/CodeGen/TargetPassConfig.h" | |||
| 24 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | |||
| 25 | #include "llvm/CodeGen/ValueTypes.h" | |||
| 26 | #include "llvm/IR/Attributes.h" | |||
| 27 | #include "llvm/IR/BasicBlock.h" | |||
| 28 | #include "llvm/IR/Constant.h" | |||
| 29 | #include "llvm/IR/Constants.h" | |||
| 30 | #include "llvm/IR/DataLayout.h" | |||
| 31 | #include "llvm/IR/DerivedTypes.h" | |||
| 32 | #include "llvm/IR/Function.h" | |||
| 33 | #include "llvm/IR/IRBuilder.h" | |||
| 34 | #include "llvm/IR/InstIterator.h" | |||
| 35 | #include "llvm/IR/Instruction.h" | |||
| 36 | #include "llvm/IR/Instructions.h" | |||
| 37 | #include "llvm/IR/Module.h" | |||
| 38 | #include "llvm/IR/Type.h" | |||
| 39 | #include "llvm/IR/User.h" | |||
| 40 | #include "llvm/IR/Value.h" | |||
| 41 | #include "llvm/InitializePasses.h" | |||
| 42 | #include "llvm/Pass.h" | |||
| 43 | #include "llvm/Support/AtomicOrdering.h" | |||
| 44 | #include "llvm/Support/Casting.h" | |||
| 45 | #include "llvm/Support/Debug.h" | |||
| 46 | #include "llvm/Support/ErrorHandling.h" | |||
| 47 | #include "llvm/Support/raw_ostream.h" | |||
| 48 | #include "llvm/Target/TargetMachine.h" | |||
| 49 | #include <cassert> | |||
| 50 | #include <cstdint> | |||
| 51 | #include <iterator> | |||
| 52 | ||||
| 53 | using namespace llvm; | |||
| 54 | ||||
| 55 | #define DEBUG_TYPE"atomic-expand" "atomic-expand" | |||
| 56 | ||||
| 57 | namespace { | |||
| 58 | ||||
| 59 | class AtomicExpand: public FunctionPass { | |||
| 60 | const TargetLowering *TLI = nullptr; | |||
| 61 | ||||
| 62 | public: | |||
| 63 | static char ID; // Pass identification, replacement for typeid | |||
| 64 | ||||
| 65 | AtomicExpand() : FunctionPass(ID) { | |||
| 66 | initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); | |||
| 67 | } | |||
| 68 | ||||
| 69 | bool runOnFunction(Function &F) override; | |||
| 70 | ||||
| 71 | private: | |||
| 72 | bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); | |||
| 73 | IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); | |||
| 74 | LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); | |||
| 75 | bool tryExpandAtomicLoad(LoadInst *LI); | |||
| 76 | bool expandAtomicLoadToLL(LoadInst *LI); | |||
| 77 | bool expandAtomicLoadToCmpXchg(LoadInst *LI); | |||
| 78 | StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); | |||
| 79 | bool expandAtomicStore(StoreInst *SI); | |||
| 80 | bool tryExpandAtomicRMW(AtomicRMWInst *AI); | |||
| 81 | AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); | |||
| 82 | Value * | |||
| 83 | insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, | |||
| 84 | Align AddrAlign, AtomicOrdering MemOpOrder, | |||
| 85 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); | |||
| 86 | void expandAtomicOpToLLSC( | |||
| 87 | Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign, | |||
| 88 | AtomicOrdering MemOpOrder, | |||
| 89 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); | |||
| 90 | void expandPartwordAtomicRMW( | |||
| 91 | AtomicRMWInst *I, | |||
| 92 | TargetLoweringBase::AtomicExpansionKind ExpansionKind); | |||
| 93 | AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); | |||
| 94 | bool expandPartwordCmpXchg(AtomicCmpXchgInst *I); | |||
| 95 | void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); | |||
| 96 | void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); | |||
| 97 | ||||
| 98 | AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); | |||
| 99 | static Value *insertRMWCmpXchgLoop( | |||
| 100 | IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign, | |||
| 101 | AtomicOrdering MemOpOrder, SyncScope::ID SSID, | |||
| 102 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, | |||
| 103 | CreateCmpXchgInstFun CreateCmpXchg); | |||
| 104 | bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); | |||
| 105 | ||||
| 106 | bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); | |||
| 107 | bool isIdempotentRMW(AtomicRMWInst *RMWI); | |||
| 108 | bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); | |||
| 109 | ||||
| 110 | bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment, | |||
| 111 | Value *PointerOperand, Value *ValueOperand, | |||
| 112 | Value *CASExpected, AtomicOrdering Ordering, | |||
| 113 | AtomicOrdering Ordering2, | |||
| 114 | ArrayRef<RTLIB::Libcall> Libcalls); | |||
| 115 | void expandAtomicLoadToLibcall(LoadInst *LI); | |||
| 116 | void expandAtomicStoreToLibcall(StoreInst *LI); | |||
| 117 | void expandAtomicRMWToLibcall(AtomicRMWInst *I); | |||
| 118 | void expandAtomicCASToLibcall(AtomicCmpXchgInst *I); | |||
| 119 | ||||
| 120 | friend bool | |||
| 121 | llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, | |||
| 122 | CreateCmpXchgInstFun CreateCmpXchg); | |||
| 123 | }; | |||
| 124 | ||||
| 125 | } // end anonymous namespace | |||
| 126 | ||||
| 127 | char AtomicExpand::ID = 0; | |||
| 128 | ||||
| 129 | char &llvm::AtomicExpandID = AtomicExpand::ID; | |||
| 130 | ||||
| 131 | INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",static void *initializeAtomicExpandPassOnce(PassRegistry & Registry) { PassInfo *PI = new PassInfo( "Expand Atomic instructions" , "atomic-expand", &AtomicExpand::ID, PassInfo::NormalCtor_t (callDefaultCtor<AtomicExpand>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAtomicExpandPassFlag; void llvm::initializeAtomicExpandPass (PassRegistry &Registry) { llvm::call_once(InitializeAtomicExpandPassFlag , initializeAtomicExpandPassOnce, std::ref(Registry)); } | |||
| 132 | false, false)static void *initializeAtomicExpandPassOnce(PassRegistry & Registry) { PassInfo *PI = new PassInfo( "Expand Atomic instructions" , "atomic-expand", &AtomicExpand::ID, PassInfo::NormalCtor_t (callDefaultCtor<AtomicExpand>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAtomicExpandPassFlag; void llvm::initializeAtomicExpandPass (PassRegistry &Registry) { llvm::call_once(InitializeAtomicExpandPassFlag , initializeAtomicExpandPassOnce, std::ref(Registry)); } | |||
| 133 | ||||
| 134 | FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } | |||
| 135 | ||||
| 136 | // Helper functions to retrieve the size of atomic instructions. | |||
| 137 | static unsigned getAtomicOpSize(LoadInst *LI) { | |||
| 138 | const DataLayout &DL = LI->getModule()->getDataLayout(); | |||
| 139 | return DL.getTypeStoreSize(LI->getType()); | |||
| 140 | } | |||
| 141 | ||||
| 142 | static unsigned getAtomicOpSize(StoreInst *SI) { | |||
| 143 | const DataLayout &DL = SI->getModule()->getDataLayout(); | |||
| 144 | return DL.getTypeStoreSize(SI->getValueOperand()->getType()); | |||
| 145 | } | |||
| 146 | ||||
| 147 | static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) { | |||
| 148 | const DataLayout &DL = RMWI->getModule()->getDataLayout(); | |||
| 149 | return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); | |||
| 150 | } | |||
| 151 | ||||
| 152 | static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { | |||
| 153 | const DataLayout &DL = CASI->getModule()->getDataLayout(); | |||
| 154 | return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); | |||
| 155 | } | |||
| 156 | ||||
| 157 | // Determine if a particular atomic operation has a supported size, | |||
| 158 | // and is of appropriate alignment, to be passed through for target | |||
| 159 | // lowering. (Versus turning into a __atomic libcall) | |||
| 160 | template <typename Inst> | |||
| 161 | static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { | |||
| 162 | unsigned Size = getAtomicOpSize(I); | |||
| 163 | Align Alignment = I->getAlign(); | |||
| 164 | return Alignment >= Size && | |||
| ||||
| 165 | Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; | |||
| 166 | } | |||
| 167 | ||||
| 168 | bool AtomicExpand::runOnFunction(Function &F) { | |||
| 169 | auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); | |||
| 170 | if (!TPC) | |||
| 171 | return false; | |||
| 172 | ||||
| 173 | auto &TM = TPC->getTM<TargetMachine>(); | |||
| 174 | if (!TM.getSubtargetImpl(F)->enableAtomicExpand()) | |||
| 175 | return false; | |||
| 176 | TLI = TM.getSubtargetImpl(F)->getTargetLowering(); | |||
| 177 | ||||
| 178 | SmallVector<Instruction *, 1> AtomicInsts; | |||
| 179 | ||||
| 180 | // Changing control-flow while iterating through it is a bad idea, so gather a | |||
| 181 | // list of all atomic instructions before we start. | |||
| 182 | for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { | |||
| 183 | Instruction *I = &*II; | |||
| 184 | if (I->isAtomic() && !isa<FenceInst>(I)) | |||
| 185 | AtomicInsts.push_back(I); | |||
| 186 | } | |||
| 187 | ||||
| 188 | bool MadeChange = false; | |||
| 189 | for (auto I : AtomicInsts) { | |||
| 190 | auto LI = dyn_cast<LoadInst>(I); | |||
| 191 | auto SI = dyn_cast<StoreInst>(I); | |||
| 192 | auto RMWI = dyn_cast<AtomicRMWInst>(I); | |||
| 193 | auto CASI = dyn_cast<AtomicCmpXchgInst>(I); | |||
| 194 | assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction")((void)0); | |||
| 195 | ||||
| 196 | // If the Size/Alignment is not supported, replace with a libcall. | |||
| 197 | if (LI) { | |||
| 198 | if (!atomicSizeSupported(TLI, LI)) { | |||
| 199 | expandAtomicLoadToLibcall(LI); | |||
| 200 | MadeChange = true; | |||
| 201 | continue; | |||
| 202 | } | |||
| 203 | } else if (SI) { | |||
| 204 | if (!atomicSizeSupported(TLI, SI)) { | |||
| 205 | expandAtomicStoreToLibcall(SI); | |||
| 206 | MadeChange = true; | |||
| 207 | continue; | |||
| 208 | } | |||
| 209 | } else if (RMWI) { | |||
| 210 | if (!atomicSizeSupported(TLI, RMWI)) { | |||
| 211 | expandAtomicRMWToLibcall(RMWI); | |||
| 212 | MadeChange = true; | |||
| 213 | continue; | |||
| 214 | } | |||
| 215 | } else if (CASI) { | |||
| 216 | if (!atomicSizeSupported(TLI, CASI)) { | |||
| 217 | expandAtomicCASToLibcall(CASI); | |||
| 218 | MadeChange = true; | |||
| 219 | continue; | |||
| 220 | } | |||
| 221 | } | |||
| 222 | ||||
| 223 | if (TLI->shouldInsertFencesForAtomic(I)) { | |||
| 224 | auto FenceOrdering = AtomicOrdering::Monotonic; | |||
| 225 | if (LI && isAcquireOrStronger(LI->getOrdering())) { | |||
| 226 | FenceOrdering = LI->getOrdering(); | |||
| 227 | LI->setOrdering(AtomicOrdering::Monotonic); | |||
| 228 | } else if (SI && isReleaseOrStronger(SI->getOrdering())) { | |||
| 229 | FenceOrdering = SI->getOrdering(); | |||
| 230 | SI->setOrdering(AtomicOrdering::Monotonic); | |||
| 231 | } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) || | |||
| 232 | isAcquireOrStronger(RMWI->getOrdering()))) { | |||
| 233 | FenceOrdering = RMWI->getOrdering(); | |||
| 234 | RMWI->setOrdering(AtomicOrdering::Monotonic); | |||
| 235 | } else if (CASI && | |||
| 236 | TLI->shouldExpandAtomicCmpXchgInIR(CASI) == | |||
| 237 | TargetLoweringBase::AtomicExpansionKind::None && | |||
| 238 | (isReleaseOrStronger(CASI->getSuccessOrdering()) || | |||
| 239 | isAcquireOrStronger(CASI->getSuccessOrdering()) || | |||
| 240 | isAcquireOrStronger(CASI->getFailureOrdering()))) { | |||
| 241 | // If a compare and swap is lowered to LL/SC, we can do smarter fence | |||
| 242 | // insertion, with a stronger one on the success path than on the | |||
| 243 | // failure path. As a result, fence insertion is directly done by | |||
| 244 | // expandAtomicCmpXchg in that case. | |||
| 245 | FenceOrdering = CASI->getMergedOrdering(); | |||
| 246 | CASI->setSuccessOrdering(AtomicOrdering::Monotonic); | |||
| 247 | CASI->setFailureOrdering(AtomicOrdering::Monotonic); | |||
| 248 | } | |||
| 249 | ||||
| 250 | if (FenceOrdering != AtomicOrdering::Monotonic) { | |||
| 251 | MadeChange |= bracketInstWithFences(I, FenceOrdering); | |||
| 252 | } | |||
| 253 | } | |||
| 254 | ||||
| 255 | if (LI) { | |||
| 256 | if (LI->getType()->isFloatingPointTy()) { | |||
| 257 | // TODO: add a TLI hook to control this so that each target can | |||
| 258 | // convert to lowering the original type one at a time. | |||
| 259 | LI = convertAtomicLoadToIntegerType(LI); | |||
| 260 | assert(LI->getType()->isIntegerTy() && "invariant broken")((void)0); | |||
| 261 | MadeChange = true; | |||
| 262 | } | |||
| 263 | ||||
| 264 | MadeChange |= tryExpandAtomicLoad(LI); | |||
| 265 | } else if (SI) { | |||
| 266 | if (SI->getValueOperand()->getType()->isFloatingPointTy()) { | |||
| 267 | // TODO: add a TLI hook to control this so that each target can | |||
| 268 | // convert to lowering the original type one at a time. | |||
| 269 | SI = convertAtomicStoreToIntegerType(SI); | |||
| 270 | assert(SI->getValueOperand()->getType()->isIntegerTy() &&((void)0) | |||
| 271 | "invariant broken")((void)0); | |||
| 272 | MadeChange = true; | |||
| 273 | } | |||
| 274 | ||||
| 275 | if (TLI->shouldExpandAtomicStoreInIR(SI)) | |||
| 276 | MadeChange |= expandAtomicStore(SI); | |||
| 277 | } else if (RMWI) { | |||
| 278 | // There are two different ways of expanding RMW instructions: | |||
| 279 | // - into a load if it is idempotent | |||
| 280 | // - into a Cmpxchg/LL-SC loop otherwise | |||
| 281 | // we try them in that order. | |||
| 282 | ||||
| 283 | if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { | |||
| 284 | MadeChange = true; | |||
| 285 | } else { | |||
| 286 | AtomicRMWInst::BinOp Op = RMWI->getOperation(); | |||
| 287 | if (Op == AtomicRMWInst::Xchg && | |||
| 288 | RMWI->getValOperand()->getType()->isFloatingPointTy()) { | |||
| 289 | // TODO: add a TLI hook to control this so that each target can | |||
| 290 | // convert to lowering the original type one at a time. | |||
| 291 | RMWI = convertAtomicXchgToIntegerType(RMWI); | |||
| 292 | assert(RMWI->getValOperand()->getType()->isIntegerTy() &&((void)0) | |||
| 293 | "invariant broken")((void)0); | |||
| 294 | MadeChange = true; | |||
| 295 | } | |||
| 296 | unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; | |||
| 297 | unsigned ValueSize = getAtomicOpSize(RMWI); | |||
| 298 | if (ValueSize < MinCASSize && | |||
| 299 | (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || | |||
| 300 | Op == AtomicRMWInst::And)) { | |||
| 301 | RMWI = widenPartwordAtomicRMW(RMWI); | |||
| 302 | MadeChange = true; | |||
| 303 | } | |||
| 304 | ||||
| 305 | MadeChange |= tryExpandAtomicRMW(RMWI); | |||
| 306 | } | |||
| 307 | } else if (CASI) { | |||
| 308 | // TODO: when we're ready to make the change at the IR level, we can | |||
| 309 | // extend convertCmpXchgToInteger for floating point too. | |||
| 310 | assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&((void)0) | |||
| 311 | "unimplemented - floating point not legal at IR level")((void)0); | |||
| 312 | if (CASI->getCompareOperand()->getType()->isPointerTy() ) { | |||
| 313 | // TODO: add a TLI hook to control this so that each target can | |||
| 314 | // convert to lowering the original type one at a time. | |||
| 315 | CASI = convertCmpXchgToIntegerType(CASI); | |||
| 316 | assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&((void)0) | |||
| 317 | "invariant broken")((void)0); | |||
| 318 | MadeChange = true; | |||
| 319 | } | |||
| 320 | ||||
| 321 | MadeChange |= tryExpandAtomicCmpXchg(CASI); | |||
| 322 | } | |||
| 323 | } | |||
| 324 | return MadeChange; | |||
| 325 | } | |||
| 326 | ||||
| 327 | bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) { | |||
| 328 | IRBuilder<> Builder(I); | |||
| 329 | ||||
| 330 | auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); | |||
| 331 | ||||
| 332 | auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order); | |||
| 333 | // We have a guard here because not every atomic operation generates a | |||
| 334 | // trailing fence. | |||
| 335 | if (TrailingFence) | |||
| 336 | TrailingFence->moveAfter(I); | |||
| 337 | ||||
| 338 | return (LeadingFence || TrailingFence); | |||
| 339 | } | |||
| 340 | ||||
| 341 | /// Get the iX type with the same bitwidth as T. | |||
| 342 | IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, | |||
| 343 | const DataLayout &DL) { | |||
| 344 | EVT VT = TLI->getMemValueType(DL, T); | |||
| 345 | unsigned BitWidth = VT.getStoreSizeInBits(); | |||
| 346 | assert(BitWidth == VT.getSizeInBits() && "must be a power of two")((void)0); | |||
| 347 | return IntegerType::get(T->getContext(), BitWidth); | |||
| 348 | } | |||
| 349 | ||||
| 350 | /// Convert an atomic load of a non-integral type to an integer load of the | |||
| 351 | /// equivalent bitwidth. See the function comment on | |||
| 352 | /// convertAtomicStoreToIntegerType for background. | |||
| 353 | LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { | |||
| 354 | auto *M = LI->getModule(); | |||
| 355 | Type *NewTy = getCorrespondingIntegerType(LI->getType(), | |||
| 356 | M->getDataLayout()); | |||
| 357 | ||||
| 358 | IRBuilder<> Builder(LI); | |||
| 359 | ||||
| 360 | Value *Addr = LI->getPointerOperand(); | |||
| 361 | Type *PT = PointerType::get(NewTy, | |||
| 362 | Addr->getType()->getPointerAddressSpace()); | |||
| 363 | Value *NewAddr = Builder.CreateBitCast(Addr, PT); | |||
| 364 | ||||
| 365 | auto *NewLI = Builder.CreateLoad(NewTy, NewAddr); | |||
| 366 | NewLI->setAlignment(LI->getAlign()); | |||
| 367 | NewLI->setVolatile(LI->isVolatile()); | |||
| 368 | NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); | |||
| 369 | LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n")do { } while (false); | |||
| 370 | ||||
| 371 | Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); | |||
| 372 | LI->replaceAllUsesWith(NewVal); | |||
| 373 | LI->eraseFromParent(); | |||
| 374 | return NewLI; | |||
| 375 | } | |||
| 376 | ||||
| 377 | AtomicRMWInst * | |||
| 378 | AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { | |||
| 379 | auto *M = RMWI->getModule(); | |||
| 380 | Type *NewTy = | |||
| 381 | getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout()); | |||
| 382 | ||||
| 383 | IRBuilder<> Builder(RMWI); | |||
| 384 | ||||
| 385 | Value *Addr = RMWI->getPointerOperand(); | |||
| 386 | Value *Val = RMWI->getValOperand(); | |||
| 387 | Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace()); | |||
| 388 | Value *NewAddr = Builder.CreateBitCast(Addr, PT); | |||
| 389 | Value *NewVal = Builder.CreateBitCast(Val, NewTy); | |||
| 390 | ||||
| 391 | auto *NewRMWI = | |||
| 392 | Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal, | |||
| 393 | RMWI->getAlign(), RMWI->getOrdering()); | |||
| 394 | NewRMWI->setVolatile(RMWI->isVolatile()); | |||
| 395 | LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n")do { } while (false); | |||
| 396 | ||||
| 397 | Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType()); | |||
| 398 | RMWI->replaceAllUsesWith(NewRVal); | |||
| 399 | RMWI->eraseFromParent(); | |||
| 400 | return NewRMWI; | |||
| 401 | } | |||
| 402 | ||||
| 403 | bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { | |||
| 404 | switch (TLI->shouldExpandAtomicLoadInIR(LI)) { | |||
| 405 | case TargetLoweringBase::AtomicExpansionKind::None: | |||
| 406 | return false; | |||
| 407 | case TargetLoweringBase::AtomicExpansionKind::LLSC: | |||
| 408 | expandAtomicOpToLLSC( | |||
| 409 | LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(), | |||
| 410 | LI->getOrdering(), | |||
| 411 | [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); | |||
| 412 | return true; | |||
| 413 | case TargetLoweringBase::AtomicExpansionKind::LLOnly: | |||
| 414 | return expandAtomicLoadToLL(LI); | |||
| 415 | case TargetLoweringBase::AtomicExpansionKind::CmpXChg: | |||
| 416 | return expandAtomicLoadToCmpXchg(LI); | |||
| 417 | default: | |||
| 418 | llvm_unreachable("Unhandled case in tryExpandAtomicLoad")__builtin_unreachable(); | |||
| 419 | } | |||
| 420 | } | |||
| 421 | ||||
| 422 | bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { | |||
| 423 | IRBuilder<> Builder(LI); | |||
| 424 | ||||
| 425 | // On some architectures, load-linked instructions are atomic for larger | |||
| 426 | // sizes than normal loads. For example, the only 64-bit load guaranteed | |||
| 427 | // to be single-copy atomic by ARM is an ldrexd (A3.5.3). | |||
| 428 | Value *Val = TLI->emitLoadLinked(Builder, LI->getType(), | |||
| 429 | LI->getPointerOperand(), LI->getOrdering()); | |||
| 430 | TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); | |||
| 431 | ||||
| 432 | LI->replaceAllUsesWith(Val); | |||
| 433 | LI->eraseFromParent(); | |||
| 434 | ||||
| 435 | return true; | |||
| 436 | } | |||
| 437 | ||||
| 438 | bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { | |||
| 439 | IRBuilder<> Builder(LI); | |||
| 440 | AtomicOrdering Order = LI->getOrdering(); | |||
| 441 | if (Order == AtomicOrdering::Unordered) | |||
| 442 | Order = AtomicOrdering::Monotonic; | |||
| 443 | ||||
| 444 | Value *Addr = LI->getPointerOperand(); | |||
| 445 | Type *Ty = LI->getType(); | |||
| 446 | Constant *DummyVal = Constant::getNullValue(Ty); | |||
| 447 | ||||
| 448 | Value *Pair = Builder.CreateAtomicCmpXchg( | |||
| 449 | Addr, DummyVal, DummyVal, LI->getAlign(), Order, | |||
| 450 | AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); | |||
| 451 | Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); | |||
| 452 | ||||
| 453 | LI->replaceAllUsesWith(Loaded); | |||
| 454 | LI->eraseFromParent(); | |||
| 455 | ||||
| 456 | return true; | |||
| 457 | } | |||
| 458 | ||||
| 459 | /// Convert an atomic store of a non-integral type to an integer store of the | |||
| 460 | /// equivalent bitwidth. We used to not support floating point or vector | |||
| 461 | /// atomics in the IR at all. The backends learned to deal with the bitcast | |||
| 462 | /// idiom because that was the only way of expressing the notion of a atomic | |||
| 463 | /// float or vector store. The long term plan is to teach each backend to | |||
| 464 | /// instruction select from the original atomic store, but as a migration | |||
| 465 | /// mechanism, we convert back to the old format which the backends understand. | |||
| 466 | /// Each backend will need individual work to recognize the new format. | |||
| 467 | StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { | |||
| 468 | IRBuilder<> Builder(SI); | |||
| 469 | auto *M = SI->getModule(); | |||
| 470 | Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), | |||
| 471 | M->getDataLayout()); | |||
| 472 | Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); | |||
| 473 | ||||
| 474 | Value *Addr = SI->getPointerOperand(); | |||
| 475 | Type *PT = PointerType::get(NewTy, | |||
| 476 | Addr->getType()->getPointerAddressSpace()); | |||
| 477 | Value *NewAddr = Builder.CreateBitCast(Addr, PT); | |||
| 478 | ||||
| 479 | StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); | |||
| 480 | NewSI->setAlignment(SI->getAlign()); | |||
| 481 | NewSI->setVolatile(SI->isVolatile()); | |||
| 482 | NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); | |||
| 483 | LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n")do { } while (false); | |||
| 484 | SI->eraseFromParent(); | |||
| 485 | return NewSI; | |||
| 486 | } | |||
| 487 | ||||
| 488 | bool AtomicExpand::expandAtomicStore(StoreInst *SI) { | |||
| 489 | // This function is only called on atomic stores that are too large to be | |||
| 490 | // atomic if implemented as a native store. So we replace them by an | |||
| 491 | // atomic swap, that can be implemented for example as a ldrex/strex on ARM | |||
| 492 | // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. | |||
| 493 | // It is the responsibility of the target to only signal expansion via | |||
| 494 | // shouldExpandAtomicRMW in cases where this is required and possible. | |||
| 495 | IRBuilder<> Builder(SI); | |||
| 496 | AtomicRMWInst *AI = Builder.CreateAtomicRMW( | |||
| 497 | AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(), | |||
| 498 | SI->getAlign(), SI->getOrdering()); | |||
| 499 | SI->eraseFromParent(); | |||
| 500 | ||||
| 501 | // Now we have an appropriate swap instruction, lower it as usual. | |||
| 502 | return tryExpandAtomicRMW(AI); | |||
| 503 | } | |||
| 504 | ||||
| 505 | static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, | |||
| 506 | Value *Loaded, Value *NewVal, Align AddrAlign, | |||
| 507 | AtomicOrdering MemOpOrder, SyncScope::ID SSID, | |||
| 508 | Value *&Success, Value *&NewLoaded) { | |||
| 509 | Type *OrigTy = NewVal->getType(); | |||
| 510 | ||||
| 511 | // This code can go away when cmpxchg supports FP types. | |||
| 512 | bool NeedBitcast = OrigTy->isFloatingPointTy(); | |||
| 513 | if (NeedBitcast) { | |||
| 514 | IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); | |||
| 515 | unsigned AS = Addr->getType()->getPointerAddressSpace(); | |||
| 516 | Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS)); | |||
| 517 | NewVal = Builder.CreateBitCast(NewVal, IntTy); | |||
| 518 | Loaded = Builder.CreateBitCast(Loaded, IntTy); | |||
| 519 | } | |||
| 520 | ||||
| 521 | Value *Pair = Builder.CreateAtomicCmpXchg( | |||
| 522 | Addr, Loaded, NewVal, AddrAlign, MemOpOrder, | |||
| 523 | AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); | |||
| 524 | Success = Builder.CreateExtractValue(Pair, 1, "success"); | |||
| 525 | NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); | |||
| 526 | ||||
| 527 | if (NeedBitcast) | |||
| 528 | NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy); | |||
| 529 | } | |||
| 530 | ||||
| 531 | /// Emit IR to implement the given atomicrmw operation on values in registers, | |||
| 532 | /// returning the new value. | |||
| 533 | static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, | |||
| 534 | Value *Loaded, Value *Inc) { | |||
| 535 | Value *NewVal; | |||
| 536 | switch (Op) { | |||
| 537 | case AtomicRMWInst::Xchg: | |||
| 538 | return Inc; | |||
| 539 | case AtomicRMWInst::Add: | |||
| 540 | return Builder.CreateAdd(Loaded, Inc, "new"); | |||
| 541 | case AtomicRMWInst::Sub: | |||
| 542 | return Builder.CreateSub(Loaded, Inc, "new"); | |||
| 543 | case AtomicRMWInst::And: | |||
| 544 | return Builder.CreateAnd(Loaded, Inc, "new"); | |||
| 545 | case AtomicRMWInst::Nand: | |||
| 546 | return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); | |||
| 547 | case AtomicRMWInst::Or: | |||
| 548 | return Builder.CreateOr(Loaded, Inc, "new"); | |||
| 549 | case AtomicRMWInst::Xor: | |||
| 550 | return Builder.CreateXor(Loaded, Inc, "new"); | |||
| 551 | case AtomicRMWInst::Max: | |||
| 552 | NewVal = Builder.CreateICmpSGT(Loaded, Inc); | |||
| 553 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | |||
| 554 | case AtomicRMWInst::Min: | |||
| 555 | NewVal = Builder.CreateICmpSLE(Loaded, Inc); | |||
| 556 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | |||
| 557 | case AtomicRMWInst::UMax: | |||
| 558 | NewVal = Builder.CreateICmpUGT(Loaded, Inc); | |||
| 559 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | |||
| 560 | case AtomicRMWInst::UMin: | |||
| 561 | NewVal = Builder.CreateICmpULE(Loaded, Inc); | |||
| 562 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | |||
| 563 | case AtomicRMWInst::FAdd: | |||
| 564 | return Builder.CreateFAdd(Loaded, Inc, "new"); | |||
| 565 | case AtomicRMWInst::FSub: | |||
| 566 | return Builder.CreateFSub(Loaded, Inc, "new"); | |||
| 567 | default: | |||
| 568 | llvm_unreachable("Unknown atomic op")__builtin_unreachable(); | |||
| 569 | } | |||
| 570 | } | |||
| 571 | ||||
| 572 | bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { | |||
| 573 | switch (TLI->shouldExpandAtomicRMWInIR(AI)) { | |||
| 574 | case TargetLoweringBase::AtomicExpansionKind::None: | |||
| 575 | return false; | |||
| 576 | case TargetLoweringBase::AtomicExpansionKind::LLSC: { | |||
| 577 | unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; | |||
| 578 | unsigned ValueSize = getAtomicOpSize(AI); | |||
| 579 | if (ValueSize < MinCASSize) { | |||
| 580 | expandPartwordAtomicRMW(AI, | |||
| 581 | TargetLoweringBase::AtomicExpansionKind::LLSC); | |||
| 582 | } else { | |||
| 583 | auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) { | |||
| 584 | return performAtomicOp(AI->getOperation(), Builder, Loaded, | |||
| 585 | AI->getValOperand()); | |||
| 586 | }; | |||
| 587 | expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(), | |||
| 588 | AI->getAlign(), AI->getOrdering(), PerformOp); | |||
| 589 | } | |||
| 590 | return true; | |||
| 591 | } | |||
| 592 | case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { | |||
| 593 | unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; | |||
| 594 | unsigned ValueSize = getAtomicOpSize(AI); | |||
| 595 | if (ValueSize < MinCASSize) { | |||
| 596 | // TODO: Handle atomicrmw fadd/fsub | |||
| 597 | if (AI->getType()->isFloatingPointTy()) | |||
| 598 | return false; | |||
| 599 | ||||
| 600 | expandPartwordAtomicRMW(AI, | |||
| 601 | TargetLoweringBase::AtomicExpansionKind::CmpXChg); | |||
| 602 | } else { | |||
| 603 | expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); | |||
| 604 | } | |||
| 605 | return true; | |||
| 606 | } | |||
| 607 | case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: { | |||
| 608 | expandAtomicRMWToMaskedIntrinsic(AI); | |||
| 609 | return true; | |||
| 610 | } | |||
| 611 | default: | |||
| 612 | llvm_unreachable("Unhandled case in tryExpandAtomicRMW")__builtin_unreachable(); | |||
| 613 | } | |||
| 614 | } | |||
| 615 | ||||
| 616 | namespace { | |||
| 617 | ||||
| 618 | struct PartwordMaskValues { | |||
| 619 | // These three fields are guaranteed to be set by createMaskInstrs. | |||
| 620 | Type *WordType = nullptr; | |||
| 621 | Type *ValueType = nullptr; | |||
| 622 | Value *AlignedAddr = nullptr; | |||
| 623 | Align AlignedAddrAlignment; | |||
| 624 | // The remaining fields can be null. | |||
| 625 | Value *ShiftAmt = nullptr; | |||
| 626 | Value *Mask = nullptr; | |||
| 627 | Value *Inv_Mask = nullptr; | |||
| 628 | }; | |||
| 629 | ||||
| 630 | LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) | |||
| 631 | raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { | |||
| 632 | auto PrintObj = [&O](auto *V) { | |||
| 633 | if (V) | |||
| 634 | O << *V; | |||
| 635 | else | |||
| 636 | O << "nullptr"; | |||
| 637 | O << '\n'; | |||
| 638 | }; | |||
| 639 | O << "PartwordMaskValues {\n"; | |||
| 640 | O << " WordType: "; | |||
| 641 | PrintObj(PMV.WordType); | |||
| 642 | O << " ValueType: "; | |||
| 643 | PrintObj(PMV.ValueType); | |||
| 644 | O << " AlignedAddr: "; | |||
| 645 | PrintObj(PMV.AlignedAddr); | |||
| 646 | O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n'; | |||
| 647 | O << " ShiftAmt: "; | |||
| 648 | PrintObj(PMV.ShiftAmt); | |||
| 649 | O << " Mask: "; | |||
| 650 | PrintObj(PMV.Mask); | |||
| 651 | O << " Inv_Mask: "; | |||
| 652 | PrintObj(PMV.Inv_Mask); | |||
| 653 | O << "}\n"; | |||
| 654 | return O; | |||
| 655 | } | |||
| 656 | ||||
| 657 | } // end anonymous namespace | |||
| 658 | ||||
| 659 | /// This is a helper function which builds instructions to provide | |||
| 660 | /// values necessary for partword atomic operations. It takes an | |||
| 661 | /// incoming address, Addr, and ValueType, and constructs the address, | |||
| 662 | /// shift-amounts and masks needed to work with a larger value of size | |||
| 663 | /// WordSize. | |||
| 664 | /// | |||
| 665 | /// AlignedAddr: Addr rounded down to a multiple of WordSize | |||
| 666 | /// | |||
| 667 | /// ShiftAmt: Number of bits to right-shift a WordSize value loaded | |||
| 668 | /// from AlignAddr for it to have the same value as if | |||
| 669 | /// ValueType was loaded from Addr. | |||
| 670 | /// | |||
| 671 | /// Mask: Value to mask with the value loaded from AlignAddr to | |||
| 672 | /// include only the part that would've been loaded from Addr. | |||
| 673 | /// | |||
| 674 | /// Inv_Mask: The inverse of Mask. | |||
| 675 | static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, | |||
| 676 | Type *ValueType, Value *Addr, | |||
| 677 | Align AddrAlign, | |||
| 678 | unsigned MinWordSize) { | |||
| 679 | PartwordMaskValues PMV; | |||
| 680 | ||||
| 681 | Module *M = I->getModule(); | |||
| 682 | LLVMContext &Ctx = M->getContext(); | |||
| 683 | const DataLayout &DL = M->getDataLayout(); | |||
| 684 | unsigned ValueSize = DL.getTypeStoreSize(ValueType); | |||
| 685 | ||||
| 686 | PMV.ValueType = ValueType; | |||
| 687 | PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8) | |||
| 688 | : ValueType; | |||
| 689 | if (PMV.ValueType == PMV.WordType) { | |||
| 690 | PMV.AlignedAddr = Addr; | |||
| 691 | PMV.AlignedAddrAlignment = AddrAlign; | |||
| 692 | PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0); | |||
| 693 | PMV.Mask = ConstantInt::get(PMV.ValueType, ~0); | |||
| 694 | return PMV; | |||
| 695 | } | |||
| 696 | ||||
| 697 | assert(ValueSize < MinWordSize)((void)0); | |||
| 698 | ||||
| 699 | Type *WordPtrType = | |||
| 700 | PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); | |||
| 701 | ||||
| 702 | // TODO: we could skip some of this if AddrAlign >= MinWordSize. | |||
| 703 | Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); | |||
| 704 | PMV.AlignedAddr = Builder.CreateIntToPtr( | |||
| 705 | Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType, | |||
| 706 | "AlignedAddr"); | |||
| 707 | PMV.AlignedAddrAlignment = Align(MinWordSize); | |||
| 708 | ||||
| 709 | Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); | |||
| 710 | if (DL.isLittleEndian()) { | |||
| 711 | // turn bytes into bits | |||
| 712 | PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3); | |||
| 713 | } else { | |||
| 714 | // turn bytes into bits, and count from the other side. | |||
| 715 | PMV.ShiftAmt = Builder.CreateShl( | |||
| 716 | Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3); | |||
| 717 | } | |||
| 718 | ||||
| 719 | PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt"); | |||
| 720 | PMV.Mask = Builder.CreateShl( | |||
| 721 | ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt, | |||
| 722 | "Mask"); | |||
| 723 | PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask"); | |||
| 724 | return PMV; | |||
| 725 | } | |||
| 726 | ||||
| 727 | static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord, | |||
| 728 | const PartwordMaskValues &PMV) { | |||
| 729 | assert(WideWord->getType() == PMV.WordType && "Widened type mismatch")((void)0); | |||
| 730 | if (PMV.WordType == PMV.ValueType) | |||
| 731 | return WideWord; | |||
| 732 | ||||
| 733 | Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted"); | |||
| 734 | Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted"); | |||
| 735 | return Trunc; | |||
| 736 | } | |||
| 737 | ||||
| 738 | static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord, | |||
| 739 | Value *Updated, const PartwordMaskValues &PMV) { | |||
| 740 | assert(WideWord->getType() == PMV.WordType && "Widened type mismatch")((void)0); | |||
| 741 | assert(Updated->getType() == PMV.ValueType && "Value type mismatch")((void)0); | |||
| 742 | if (PMV.WordType == PMV.ValueType) | |||
| 743 | return Updated; | |||
| 744 | ||||
| 745 | Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended"); | |||
| 746 | Value *Shift = | |||
| 747 | Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true); | |||
| 748 | Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked"); | |||
| 749 | Value *Or = Builder.CreateOr(And, Shift, "inserted"); | |||
| 750 | return Or; | |||
| 751 | } | |||
| 752 | ||||
| 753 | /// Emit IR to implement a masked version of a given atomicrmw | |||
| 754 | /// operation. (That is, only the bits under the Mask should be | |||
| 755 | /// affected by the operation) | |||
| 756 | static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, | |||
| 757 | IRBuilder<> &Builder, Value *Loaded, | |||
| 758 | Value *Shifted_Inc, Value *Inc, | |||
| 759 | const PartwordMaskValues &PMV) { | |||
| 760 | // TODO: update to use | |||
| 761 | // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order | |||
| 762 | // to merge bits from two values without requiring PMV.Inv_Mask. | |||
| 763 | switch (Op) { | |||
| 764 | case AtomicRMWInst::Xchg: { | |||
| 765 | Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); | |||
| 766 | Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc); | |||
| 767 | return FinalVal; | |||
| 768 | } | |||
| 769 | case AtomicRMWInst::Or: | |||
| 770 | case AtomicRMWInst::Xor: | |||
| 771 | case AtomicRMWInst::And: | |||
| 772 | llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW")__builtin_unreachable(); | |||
| 773 | case AtomicRMWInst::Add: | |||
| 774 | case AtomicRMWInst::Sub: | |||
| 775 | case AtomicRMWInst::Nand: { | |||
| 776 | // The other arithmetic ops need to be masked into place. | |||
| 777 | Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc); | |||
| 778 | Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask); | |||
| 779 | Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); | |||
| 780 | Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked); | |||
| 781 | return FinalVal; | |||
| 782 | } | |||
| 783 | case AtomicRMWInst::Max: | |||
| 784 | case AtomicRMWInst::Min: | |||
| 785 | case AtomicRMWInst::UMax: | |||
| 786 | case AtomicRMWInst::UMin: { | |||
| 787 | // Finally, comparison ops will operate on the full value, so | |||
| 788 | // truncate down to the original size, and expand out again after | |||
| 789 | // doing the operation. | |||
| 790 | Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); | |||
| 791 | Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc); | |||
| 792 | Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); | |||
| 793 | return FinalVal; | |||
| 794 | } | |||
| 795 | default: | |||
| 796 | llvm_unreachable("Unknown atomic op")__builtin_unreachable(); | |||
| 797 | } | |||
| 798 | } | |||
| 799 | ||||
| 800 | /// Expand a sub-word atomicrmw operation into an appropriate | |||
| 801 | /// word-sized operation. | |||
| 802 | /// | |||
| 803 | /// It will create an LL/SC or cmpxchg loop, as appropriate, the same | |||
| 804 | /// way as a typical atomicrmw expansion. The only difference here is | |||
| 805 | /// that the operation inside of the loop may operate upon only a | |||
| 806 | /// part of the value. | |||
| 807 | void AtomicExpand::expandPartwordAtomicRMW( | |||
| 808 | AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { | |||
| 809 | AtomicOrdering MemOpOrder = AI->getOrdering(); | |||
| 810 | SyncScope::ID SSID = AI->getSyncScopeID(); | |||
| 811 | ||||
| 812 | IRBuilder<> Builder(AI); | |||
| 813 | ||||
| 814 | PartwordMaskValues PMV = | |||
| 815 | createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), | |||
| 816 | AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | |||
| 817 | ||||
| 818 | Value *ValOperand_Shifted = | |||
| 819 | Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), | |||
| 820 | PMV.ShiftAmt, "ValOperand_Shifted"); | |||
| 821 | ||||
| 822 | auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) { | |||
| 823 | return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded, | |||
| 824 | ValOperand_Shifted, AI->getValOperand(), PMV); | |||
| 825 | }; | |||
| 826 | ||||
| 827 | Value *OldResult; | |||
| 828 | if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { | |||
| 829 | OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, | |||
| 830 | PMV.AlignedAddrAlignment, MemOpOrder, | |||
| 831 | SSID, PerformPartwordOp, | |||
| 832 | createCmpXchgInstFun); | |||
| 833 | } else { | |||
| 834 | assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC)((void)0); | |||
| 835 | OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, | |||
| 836 | PMV.AlignedAddrAlignment, MemOpOrder, | |||
| 837 | PerformPartwordOp); | |||
| 838 | } | |||
| 839 | ||||
| 840 | Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); | |||
| 841 | AI->replaceAllUsesWith(FinalOldResult); | |||
| 842 | AI->eraseFromParent(); | |||
| 843 | } | |||
| 844 | ||||
| 845 | // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width. | |||
| 846 | AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { | |||
| 847 | IRBuilder<> Builder(AI); | |||
| 848 | AtomicRMWInst::BinOp Op = AI->getOperation(); | |||
| 849 | ||||
| 850 | assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||((void)0) | |||
| 851 | Op == AtomicRMWInst::And) &&((void)0) | |||
| 852 | "Unable to widen operation")((void)0); | |||
| 853 | ||||
| 854 | PartwordMaskValues PMV = | |||
| 855 | createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), | |||
| 856 | AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | |||
| 857 | ||||
| 858 | Value *ValOperand_Shifted = | |||
| 859 | Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), | |||
| 860 | PMV.ShiftAmt, "ValOperand_Shifted"); | |||
| 861 | ||||
| 862 | Value *NewOperand; | |||
| 863 | ||||
| 864 | if (Op == AtomicRMWInst::And) | |||
| 865 | NewOperand = | |||
| 866 | Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand"); | |||
| 867 | else | |||
| 868 | NewOperand = ValOperand_Shifted; | |||
| 869 | ||||
| 870 | AtomicRMWInst *NewAI = | |||
| 871 | Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand, | |||
| 872 | PMV.AlignedAddrAlignment, AI->getOrdering()); | |||
| 873 | ||||
| 874 | Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); | |||
| 875 | AI->replaceAllUsesWith(FinalOldResult); | |||
| 876 | AI->eraseFromParent(); | |||
| 877 | return NewAI; | |||
| 878 | } | |||
| 879 | ||||
| 880 | bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { | |||
| 881 | // The basic idea here is that we're expanding a cmpxchg of a | |||
| 882 | // smaller memory size up to a word-sized cmpxchg. To do this, we | |||
| 883 | // need to add a retry-loop for strong cmpxchg, so that | |||
| 884 | // modifications to other parts of the word don't cause a spurious | |||
| 885 | // failure. | |||
| 886 | ||||
| 887 | // This generates code like the following: | |||
| 888 | // [[Setup mask values PMV.*]] | |||
| 889 | // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt | |||
| 890 | // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt | |||
| 891 | // %InitLoaded = load i32* %addr | |||
| 892 | // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask | |||
| 893 | // br partword.cmpxchg.loop | |||
| 894 | // partword.cmpxchg.loop: | |||
| 895 | // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ], | |||
| 896 | // [ %OldVal_MaskOut, %partword.cmpxchg.failure ] | |||
| 897 | // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted | |||
| 898 | // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted | |||
| 899 | // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp, | |||
| 900 | // i32 %FullWord_NewVal success_ordering failure_ordering | |||
| 901 | // %OldVal = extractvalue { i32, i1 } %NewCI, 0 | |||
| 902 | // %Success = extractvalue { i32, i1 } %NewCI, 1 | |||
| 903 | // br i1 %Success, label %partword.cmpxchg.end, | |||
| 904 | // label %partword.cmpxchg.failure | |||
| 905 | // partword.cmpxchg.failure: | |||
| 906 | // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask | |||
| 907 | // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut | |||
| 908 | // br i1 %ShouldContinue, label %partword.cmpxchg.loop, | |||
| 909 | // label %partword.cmpxchg.end | |||
| 910 | // partword.cmpxchg.end: | |||
| 911 | // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt | |||
| 912 | // %FinalOldVal = trunc i32 %tmp1 to i8 | |||
| 913 | // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0 | |||
| 914 | // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1 | |||
| 915 | ||||
| 916 | Value *Addr = CI->getPointerOperand(); | |||
| 917 | Value *Cmp = CI->getCompareOperand(); | |||
| 918 | Value *NewVal = CI->getNewValOperand(); | |||
| 919 | ||||
| 920 | BasicBlock *BB = CI->getParent(); | |||
| 921 | Function *F = BB->getParent(); | |||
| 922 | IRBuilder<> Builder(CI); | |||
| 923 | LLVMContext &Ctx = Builder.getContext(); | |||
| 924 | ||||
| 925 | BasicBlock *EndBB = | |||
| 926 | BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end"); | |||
| 927 | auto FailureBB = | |||
| 928 | BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB); | |||
| 929 | auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB); | |||
| 930 | ||||
| 931 | // The split call above "helpfully" added a branch at the end of BB | |||
| 932 | // (to the wrong place). | |||
| 933 | std::prev(BB->end())->eraseFromParent(); | |||
| 934 | Builder.SetInsertPoint(BB); | |||
| 935 | ||||
| 936 | PartwordMaskValues PMV = | |||
| 937 | createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, | |||
| 938 | CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | |||
| 939 | ||||
| 940 | // Shift the incoming values over, into the right location in the word. | |||
| 941 | Value *NewVal_Shifted = | |||
| 942 | Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); | |||
| 943 | Value *Cmp_Shifted = | |||
| 944 | Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt); | |||
| 945 | ||||
| 946 | // Load the entire current word, and mask into place the expected and new | |||
| 947 | // values | |||
| 948 | LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr); | |||
| 949 | InitLoaded->setVolatile(CI->isVolatile()); | |||
| 950 | Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask); | |||
| 951 | Builder.CreateBr(LoopBB); | |||
| 952 | ||||
| 953 | // partword.cmpxchg.loop: | |||
| 954 | Builder.SetInsertPoint(LoopBB); | |||
| 955 | PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2); | |||
| 956 | Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB); | |||
| 957 | ||||
| 958 | // Mask/Or the expected and new values into place in the loaded word. | |||
| 959 | Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted); | |||
| 960 | Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); | |||
| 961 | AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( | |||
| 962 | PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment, | |||
| 963 | CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID()); | |||
| 964 | NewCI->setVolatile(CI->isVolatile()); | |||
| 965 | // When we're building a strong cmpxchg, we need a loop, so you | |||
| 966 | // might think we could use a weak cmpxchg inside. But, using strong | |||
| 967 | // allows the below comparison for ShouldContinue, and we're | |||
| 968 | // expecting the underlying cmpxchg to be a machine instruction, | |||
| 969 | // which is strong anyways. | |||
| 970 | NewCI->setWeak(CI->isWeak()); | |||
| 971 | ||||
| 972 | Value *OldVal = Builder.CreateExtractValue(NewCI, 0); | |||
| 973 | Value *Success = Builder.CreateExtractValue(NewCI, 1); | |||
| 974 | ||||
| 975 | if (CI->isWeak()) | |||
| 976 | Builder.CreateBr(EndBB); | |||
| 977 | else | |||
| 978 | Builder.CreateCondBr(Success, EndBB, FailureBB); | |||
| 979 | ||||
| 980 | // partword.cmpxchg.failure: | |||
| 981 | Builder.SetInsertPoint(FailureBB); | |||
| 982 | // Upon failure, verify that the masked-out part of the loaded value | |||
| 983 | // has been modified. If it didn't, abort the cmpxchg, since the | |||
| 984 | // masked-in part must've. | |||
| 985 | Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask); | |||
| 986 | Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut); | |||
| 987 | Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB); | |||
| 988 | ||||
| 989 | // Add the second value to the phi from above | |||
| 990 | Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB); | |||
| 991 | ||||
| 992 | // partword.cmpxchg.end: | |||
| 993 | Builder.SetInsertPoint(CI); | |||
| 994 | ||||
| 995 | Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); | |||
| 996 | Value *Res = UndefValue::get(CI->getType()); | |||
| 997 | Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); | |||
| 998 | Res = Builder.CreateInsertValue(Res, Success, 1); | |||
| 999 | ||||
| 1000 | CI->replaceAllUsesWith(Res); | |||
| 1001 | CI->eraseFromParent(); | |||
| 1002 | return true; | |||
| 1003 | } | |||
| 1004 | ||||
| 1005 | void AtomicExpand::expandAtomicOpToLLSC( | |||
| 1006 | Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign, | |||
| 1007 | AtomicOrdering MemOpOrder, | |||
| 1008 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { | |||
| 1009 | IRBuilder<> Builder(I); | |||
| 1010 | Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign, | |||
| 1011 | MemOpOrder, PerformOp); | |||
| 1012 | ||||
| 1013 | I->replaceAllUsesWith(Loaded); | |||
| 1014 | I->eraseFromParent(); | |||
| 1015 | } | |||
| 1016 | ||||
| 1017 | void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { | |||
| 1018 | IRBuilder<> Builder(AI); | |||
| 1019 | ||||
| 1020 | PartwordMaskValues PMV = | |||
| 1021 | createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), | |||
| 1022 | AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | |||
| 1023 | ||||
| 1024 | // The value operand must be sign-extended for signed min/max so that the | |||
| 1025 | // target's signed comparison instructions can be used. Otherwise, just | |||
| 1026 | // zero-ext. | |||
| 1027 | Instruction::CastOps CastOp = Instruction::ZExt; | |||
| 1028 | AtomicRMWInst::BinOp RMWOp = AI->getOperation(); | |||
| 1029 | if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min) | |||
| 1030 | CastOp = Instruction::SExt; | |||
| 1031 | ||||
| 1032 | Value *ValOperand_Shifted = Builder.CreateShl( | |||
| 1033 | Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType), | |||
| 1034 | PMV.ShiftAmt, "ValOperand_Shifted"); | |||
| 1035 | Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic( | |||
| 1036 | Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt, | |||
| 1037 | AI->getOrdering()); | |||
| 1038 | Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); | |||
| 1039 | AI->replaceAllUsesWith(FinalOldResult); | |||
| 1040 | AI->eraseFromParent(); | |||
| 1041 | } | |||
| 1042 | ||||
| 1043 | void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { | |||
| 1044 | IRBuilder<> Builder(CI); | |||
| 1045 | ||||
| 1046 | PartwordMaskValues PMV = createMaskInstrs( | |||
| 1047 | Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), | |||
| 1048 | CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | |||
| 1049 | ||||
| 1050 | Value *CmpVal_Shifted = Builder.CreateShl( | |||
| 1051 | Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt, | |||
| 1052 | "CmpVal_Shifted"); | |||
| 1053 | Value *NewVal_Shifted = Builder.CreateShl( | |||
| 1054 | Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt, | |||
| 1055 | "NewVal_Shifted"); | |||
| 1056 | Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( | |||
| 1057 | Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, | |||
| 1058 | CI->getMergedOrdering()); | |||
| 1059 | Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); | |||
| 1060 | Value *Res = UndefValue::get(CI->getType()); | |||
| 1061 | Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); | |||
| 1062 | Value *Success = Builder.CreateICmpEQ( | |||
| 1063 | CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success"); | |||
| 1064 | Res = Builder.CreateInsertValue(Res, Success, 1); | |||
| 1065 | ||||
| 1066 | CI->replaceAllUsesWith(Res); | |||
| 1067 | CI->eraseFromParent(); | |||
| 1068 | } | |||
| 1069 | ||||
| 1070 | Value *AtomicExpand::insertRMWLLSCLoop( | |||
| 1071 | IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, | |||
| 1072 | AtomicOrdering MemOpOrder, | |||
| 1073 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { | |||
| 1074 | LLVMContext &Ctx = Builder.getContext(); | |||
| 1075 | BasicBlock *BB = Builder.GetInsertBlock(); | |||
| 1076 | Function *F = BB->getParent(); | |||
| 1077 | ||||
| 1078 | assert(AddrAlign >=((void)0) | |||
| 1079 | F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&((void)0) | |||
| 1080 | "Expected at least natural alignment at this point.")((void)0); | |||
| 1081 | ||||
| 1082 | // Given: atomicrmw some_op iN* %addr, iN %incr ordering | |||
| 1083 | // | |||
| 1084 | // The standard expansion we produce is: | |||
| 1085 | // [...] | |||
| 1086 | // atomicrmw.start: | |||
| 1087 | // %loaded = @load.linked(%addr) | |||
| 1088 | // %new = some_op iN %loaded, %incr | |||
| 1089 | // %stored = @store_conditional(%new, %addr) | |||
| 1090 | // %try_again = icmp i32 ne %stored, 0 | |||
| 1091 | // br i1 %try_again, label %loop, label %atomicrmw.end | |||
| 1092 | // atomicrmw.end: | |||
| 1093 | // [...] | |||
| 1094 | BasicBlock *ExitBB = | |||
| 1095 | BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); | |||
| 1096 | BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); | |||
| 1097 | ||||
| 1098 | // The split call above "helpfully" added a branch at the end of BB (to the | |||
| 1099 | // wrong place). | |||
| 1100 | std::prev(BB->end())->eraseFromParent(); | |||
| 1101 | Builder.SetInsertPoint(BB); | |||
| 1102 | Builder.CreateBr(LoopBB); | |||
| 1103 | ||||
| 1104 | // Start the main loop block now that we've taken care of the preliminaries. | |||
| 1105 | Builder.SetInsertPoint(LoopBB); | |||
| 1106 | Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder); | |||
| 1107 | ||||
| 1108 | Value *NewVal = PerformOp(Builder, Loaded); | |||
| 1109 | ||||
| 1110 | Value *StoreSuccess = | |||
| 1111 | TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); | |||
| 1112 | Value *TryAgain = Builder.CreateICmpNE( | |||
| 1113 | StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); | |||
| 1114 | Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); | |||
| 1115 | ||||
| 1116 | Builder.SetInsertPoint(ExitBB, ExitBB->begin()); | |||
| 1117 | return Loaded; | |||
| 1118 | } | |||
| 1119 | ||||
| 1120 | /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of | |||
| 1121 | /// the equivalent bitwidth. We used to not support pointer cmpxchg in the | |||
| 1122 | /// IR. As a migration step, we convert back to what use to be the standard | |||
| 1123 | /// way to represent a pointer cmpxchg so that we can update backends one by | |||
| 1124 | /// one. | |||
| 1125 | AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { | |||
| 1126 | auto *M = CI->getModule(); | |||
| 1127 | Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), | |||
| 1128 | M->getDataLayout()); | |||
| 1129 | ||||
| 1130 | IRBuilder<> Builder(CI); | |||
| 1131 | ||||
| 1132 | Value *Addr = CI->getPointerOperand(); | |||
| 1133 | Type *PT = PointerType::get(NewTy, | |||
| 1134 | Addr->getType()->getPointerAddressSpace()); | |||
| 1135 | Value *NewAddr = Builder.CreateBitCast(Addr, PT); | |||
| 1136 | ||||
| 1137 | Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); | |||
| 1138 | Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy); | |||
| 1139 | ||||
| 1140 | auto *NewCI = Builder.CreateAtomicCmpXchg( | |||
| 1141 | NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(), | |||
| 1142 | CI->getFailureOrdering(), CI->getSyncScopeID()); | |||
| 1143 | NewCI->setVolatile(CI->isVolatile()); | |||
| 1144 | NewCI->setWeak(CI->isWeak()); | |||
| 1145 | LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n")do { } while (false); | |||
| 1146 | ||||
| 1147 | Value *OldVal = Builder.CreateExtractValue(NewCI, 0); | |||
| 1148 | Value *Succ = Builder.CreateExtractValue(NewCI, 1); | |||
| 1149 | ||||
| 1150 | OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType()); | |||
| 1151 | ||||
| 1152 | Value *Res = UndefValue::get(CI->getType()); | |||
| 1153 | Res = Builder.CreateInsertValue(Res, OldVal, 0); | |||
| 1154 | Res = Builder.CreateInsertValue(Res, Succ, 1); | |||
| 1155 | ||||
| 1156 | CI->replaceAllUsesWith(Res); | |||
| 1157 | CI->eraseFromParent(); | |||
| 1158 | return NewCI; | |||
| 1159 | } | |||
| 1160 | ||||
| 1161 | bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { | |||
| 1162 | AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); | |||
| 1163 | AtomicOrdering FailureOrder = CI->getFailureOrdering(); | |||
| 1164 | Value *Addr = CI->getPointerOperand(); | |||
| 1165 | BasicBlock *BB = CI->getParent(); | |||
| 1166 | Function *F = BB->getParent(); | |||
| 1167 | LLVMContext &Ctx = F->getContext(); | |||
| 1168 | // If shouldInsertFencesForAtomic() returns true, then the target does not | |||
| 1169 | // want to deal with memory orders, and emitLeading/TrailingFence should take | |||
| 1170 | // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we | |||
| 1171 | // should preserve the ordering. | |||
| 1172 | bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI); | |||
| 1173 | AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic | |||
| 1174 | ? AtomicOrdering::Monotonic | |||
| 1175 | : CI->getMergedOrdering(); | |||
| 1176 | ||||
| 1177 | // In implementations which use a barrier to achieve release semantics, we can | |||
| 1178 | // delay emitting this barrier until we know a store is actually going to be | |||
| 1179 | // attempted. The cost of this delay is that we need 2 copies of the block | |||
| 1180 | // emitting the load-linked, affecting code size. | |||
| 1181 | // | |||
| 1182 | // Ideally, this logic would be unconditional except for the minsize check | |||
| 1183 | // since in other cases the extra blocks naturally collapse down to the | |||
| 1184 | // minimal loop. Unfortunately, this puts too much stress on later | |||
| 1185 | // optimisations so we avoid emitting the extra logic in those cases too. | |||
| 1186 | bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic && | |||
| 1187 | SuccessOrder != AtomicOrdering::Monotonic && | |||
| 1188 | SuccessOrder != AtomicOrdering::Acquire && | |||
| 1189 | !F->hasMinSize(); | |||
| 1190 | ||||
| 1191 | // There's no overhead for sinking the release barrier in a weak cmpxchg, so | |||
| 1192 | // do it even on minsize. | |||
| 1193 | bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak(); | |||
| 1194 | ||||
| 1195 | // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord | |||
| 1196 | // | |||
| 1197 | // The full expansion we produce is: | |||
| 1198 | // [...] | |||
| 1199 | // %aligned.addr = ... | |||
| 1200 | // cmpxchg.start: | |||
| 1201 | // %unreleasedload = @load.linked(%aligned.addr) | |||
| 1202 | // %unreleasedload.extract = extract value from %unreleasedload | |||
| 1203 | // %should_store = icmp eq %unreleasedload.extract, %desired | |||
| 1204 | // br i1 %should_store, label %cmpxchg.releasingstore, | |||
| 1205 | // label %cmpxchg.nostore | |||
| 1206 | // cmpxchg.releasingstore: | |||
| 1207 | // fence? | |||
| 1208 | // br label cmpxchg.trystore | |||
| 1209 | // cmpxchg.trystore: | |||
| 1210 | // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore], | |||
| 1211 | // [%releasedload, %cmpxchg.releasedload] | |||
| 1212 | // %updated.new = insert %new into %loaded.trystore | |||
| 1213 | // %stored = @store_conditional(%updated.new, %aligned.addr) | |||
| 1214 | // %success = icmp eq i32 %stored, 0 | |||
| 1215 | // br i1 %success, label %cmpxchg.success, | |||
| 1216 | // label %cmpxchg.releasedload/%cmpxchg.failure | |||
| 1217 | // cmpxchg.releasedload: | |||
| 1218 | // %releasedload = @load.linked(%aligned.addr) | |||
| 1219 | // %releasedload.extract = extract value from %releasedload | |||
| 1220 | // %should_store = icmp eq %releasedload.extract, %desired | |||
| 1221 | // br i1 %should_store, label %cmpxchg.trystore, | |||
| 1222 | // label %cmpxchg.failure | |||
| 1223 | // cmpxchg.success: | |||
| 1224 | // fence? | |||
| 1225 | // br label %cmpxchg.end | |||
| 1226 | // cmpxchg.nostore: | |||
| 1227 | // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start], | |||
| 1228 | // [%releasedload, | |||
| 1229 | // %cmpxchg.releasedload/%cmpxchg.trystore] | |||
| 1230 | // @load_linked_fail_balance()? | |||
| 1231 | // br label %cmpxchg.failure | |||
| 1232 | // cmpxchg.failure: | |||
| 1233 | // fence? | |||
| 1234 | // br label %cmpxchg.end | |||
| 1235 | // cmpxchg.end: | |||
| 1236 | // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure], | |||
| 1237 | // [%loaded.trystore, %cmpxchg.trystore] | |||
| 1238 | // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] | |||
| 1239 | // %loaded = extract value from %loaded.exit | |||
| 1240 | // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 | |||
| 1241 | // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 | |||
| 1242 | // [...] | |||
| 1243 | BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end"); | |||
| 1244 | auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); | |||
| 1245 | auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); | |||
| 1246 | auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); | |||
| 1247 | auto ReleasedLoadBB = | |||
| 1248 | BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB); | |||
| 1249 | auto TryStoreBB = | |||
| 1250 | BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB); | |||
| 1251 | auto ReleasingStoreBB = | |||
| 1252 | BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB); | |||
| 1253 | auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB); | |||
| 1254 | ||||
| 1255 | // This grabs the DebugLoc from CI | |||
| 1256 | IRBuilder<> Builder(CI); | |||
| 1257 | ||||
| 1258 | // The split call above "helpfully" added a branch at the end of BB (to the | |||
| 1259 | // wrong place), but we might want a fence too. It's easiest to just remove | |||
| 1260 | // the branch entirely. | |||
| 1261 | std::prev(BB->end())->eraseFromParent(); | |||
| 1262 | Builder.SetInsertPoint(BB); | |||
| 1263 | if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) | |||
| 1264 | TLI->emitLeadingFence(Builder, CI, SuccessOrder); | |||
| 1265 | ||||
| 1266 | PartwordMaskValues PMV = | |||
| 1267 | createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, | |||
| 1268 | CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | |||
| 1269 | Builder.CreateBr(StartBB); | |||
| 1270 | ||||
| 1271 | // Start the main loop block now that we've taken care of the preliminaries. | |||
| 1272 | Builder.SetInsertPoint(StartBB); | |||
| 1273 | Value *UnreleasedLoad = | |||
| 1274 | TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); | |||
| 1275 | Value *UnreleasedLoadExtract = | |||
| 1276 | extractMaskedValue(Builder, UnreleasedLoad, PMV); | |||
| 1277 | Value *ShouldStore = Builder.CreateICmpEQ( | |||
| 1278 | UnreleasedLoadExtract, CI->getCompareOperand(), "should_store"); | |||
| 1279 | ||||
| 1280 | // If the cmpxchg doesn't actually need any ordering when it fails, we can | |||
| 1281 | // jump straight past that fence instruction (if it exists). | |||
| 1282 | Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB); | |||
| 1283 | ||||
| 1284 | Builder.SetInsertPoint(ReleasingStoreBB); | |||
| 1285 | if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier) | |||
| 1286 | TLI->emitLeadingFence(Builder, CI, SuccessOrder); | |||
| 1287 | Builder.CreateBr(TryStoreBB); | |||
| 1288 | ||||
| 1289 | Builder.SetInsertPoint(TryStoreBB); | |||
| 1290 | PHINode *LoadedTryStore = | |||
| 1291 | Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore"); | |||
| 1292 | LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); | |||
| 1293 | Value *NewValueInsert = | |||
| 1294 | insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); | |||
| 1295 | Value *StoreSuccess = | |||
| 1296 | TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr, | |||
| 1297 | MemOpOrder); | |||
| 1298 | StoreSuccess = Builder.CreateICmpEQ( | |||
| 1299 | StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); | |||
| 1300 | BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; | |||
| 1301 | Builder.CreateCondBr(StoreSuccess, SuccessBB, | |||
| 1302 | CI->isWeak() ? FailureBB : RetryBB); | |||
| 1303 | ||||
| 1304 | Builder.SetInsertPoint(ReleasedLoadBB); | |||
| 1305 | Value *SecondLoad; | |||
| 1306 | if (HasReleasedLoadBB) { | |||
| 1307 | SecondLoad = | |||
| 1308 | TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); | |||
| 1309 | Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); | |||
| 1310 | ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, | |||
| 1311 | CI->getCompareOperand(), "should_store"); | |||
| 1312 | ||||
| 1313 | // If the cmpxchg doesn't actually need any ordering when it fails, we can | |||
| 1314 | // jump straight past that fence instruction (if it exists). | |||
| 1315 | Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); | |||
| 1316 | // Update PHI node in TryStoreBB. | |||
| 1317 | LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB); | |||
| 1318 | } else | |||
| 1319 | Builder.CreateUnreachable(); | |||
| 1320 | ||||
| 1321 | // Make sure later instructions don't get reordered with a fence if | |||
| 1322 | // necessary. | |||
| 1323 | Builder.SetInsertPoint(SuccessBB); | |||
| 1324 | if (ShouldInsertFencesForAtomic) | |||
| 1325 | TLI->emitTrailingFence(Builder, CI, SuccessOrder); | |||
| 1326 | Builder.CreateBr(ExitBB); | |||
| 1327 | ||||
| 1328 | Builder.SetInsertPoint(NoStoreBB); | |||
| 1329 | PHINode *LoadedNoStore = | |||
| 1330 | Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore"); | |||
| 1331 | LoadedNoStore->addIncoming(UnreleasedLoad, StartBB); | |||
| 1332 | if (HasReleasedLoadBB) | |||
| 1333 | LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB); | |||
| 1334 | ||||
| 1335 | // In the failing case, where we don't execute the store-conditional, the | |||
| 1336 | // target might want to balance out the load-linked with a dedicated | |||
| 1337 | // instruction (e.g., on ARM, clearing the exclusive monitor). | |||
| 1338 | TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); | |||
| 1339 | Builder.CreateBr(FailureBB); | |||
| 1340 | ||||
| 1341 | Builder.SetInsertPoint(FailureBB); | |||
| 1342 | PHINode *LoadedFailure = | |||
| 1343 | Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure"); | |||
| 1344 | LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB); | |||
| 1345 | if (CI->isWeak()) | |||
| 1346 | LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB); | |||
| 1347 | if (ShouldInsertFencesForAtomic) | |||
| 1348 | TLI->emitTrailingFence(Builder, CI, FailureOrder); | |||
| 1349 | Builder.CreateBr(ExitBB); | |||
| 1350 | ||||
| 1351 | // Finally, we have control-flow based knowledge of whether the cmpxchg | |||
| 1352 | // succeeded or not. We expose this to later passes by converting any | |||
| 1353 | // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate | |||
| 1354 | // PHI. | |||
| 1355 | Builder.SetInsertPoint(ExitBB, ExitBB->begin()); | |||
| 1356 | PHINode *LoadedExit = | |||
| 1357 | Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit"); | |||
| 1358 | LoadedExit->addIncoming(LoadedTryStore, SuccessBB); | |||
| 1359 | LoadedExit->addIncoming(LoadedFailure, FailureBB); | |||
| 1360 | PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success"); | |||
| 1361 | Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); | |||
| 1362 | Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); | |||
| 1363 | ||||
| 1364 | // This is the "exit value" from the cmpxchg expansion. It may be of | |||
| 1365 | // a type wider than the one in the cmpxchg instruction. | |||
| 1366 | Value *LoadedFull = LoadedExit; | |||
| 1367 | ||||
| 1368 | Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator())); | |||
| 1369 | Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV); | |||
| 1370 | ||||
| 1371 | // Look for any users of the cmpxchg that are just comparing the loaded value | |||
| 1372 | // against the desired one, and replace them with the CFG-derived version. | |||
| 1373 | SmallVector<ExtractValueInst *, 2> PrunedInsts; | |||
| 1374 | for (auto User : CI->users()) { | |||
| 1375 | ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); | |||
| 1376 | if (!EV) | |||
| 1377 | continue; | |||
| 1378 | ||||
| 1379 | assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&((void)0) | |||
| 1380 | "weird extraction from { iN, i1 }")((void)0); | |||
| 1381 | ||||
| 1382 | if (EV->getIndices()[0] == 0) | |||
| 1383 | EV->replaceAllUsesWith(Loaded); | |||
| 1384 | else | |||
| 1385 | EV->replaceAllUsesWith(Success); | |||
| 1386 | ||||
| 1387 | PrunedInsts.push_back(EV); | |||
| 1388 | } | |||
| 1389 | ||||
| 1390 | // We can remove the instructions now we're no longer iterating through them. | |||
| 1391 | for (auto EV : PrunedInsts) | |||
| 1392 | EV->eraseFromParent(); | |||
| 1393 | ||||
| 1394 | if (!CI->use_empty()) { | |||
| 1395 | // Some use of the full struct return that we don't understand has happened, | |||
| 1396 | // so we've got to reconstruct it properly. | |||
| 1397 | Value *Res; | |||
| 1398 | Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); | |||
| 1399 | Res = Builder.CreateInsertValue(Res, Success, 1); | |||
| 1400 | ||||
| 1401 | CI->replaceAllUsesWith(Res); | |||
| 1402 | } | |||
| 1403 | ||||
| 1404 | CI->eraseFromParent(); | |||
| 1405 | return true; | |||
| 1406 | } | |||
| 1407 | ||||
| 1408 | bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { | |||
| 1409 | auto C = dyn_cast<ConstantInt>(RMWI->getValOperand()); | |||
| 1410 | if(!C) | |||
| 1411 | return false; | |||
| 1412 | ||||
| 1413 | AtomicRMWInst::BinOp Op = RMWI->getOperation(); | |||
| 1414 | switch(Op) { | |||
| 1415 | case AtomicRMWInst::Add: | |||
| 1416 | case AtomicRMWInst::Sub: | |||
| 1417 | case AtomicRMWInst::Or: | |||
| 1418 | case AtomicRMWInst::Xor: | |||
| 1419 | return C->isZero(); | |||
| 1420 | case AtomicRMWInst::And: | |||
| 1421 | return C->isMinusOne(); | |||
| 1422 | // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... | |||
| 1423 | default: | |||
| 1424 | return false; | |||
| 1425 | } | |||
| 1426 | } | |||
| 1427 | ||||
| 1428 | bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { | |||
| 1429 | if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { | |||
| 1430 | tryExpandAtomicLoad(ResultingLoad); | |||
| 1431 | return true; | |||
| 1432 | } | |||
| 1433 | return false; | |||
| 1434 | } | |||
| 1435 | ||||
| 1436 | Value *AtomicExpand::insertRMWCmpXchgLoop( | |||
| 1437 | IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, | |||
| 1438 | AtomicOrdering MemOpOrder, SyncScope::ID SSID, | |||
| 1439 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, | |||
| 1440 | CreateCmpXchgInstFun CreateCmpXchg) { | |||
| 1441 | LLVMContext &Ctx = Builder.getContext(); | |||
| 1442 | BasicBlock *BB = Builder.GetInsertBlock(); | |||
| 1443 | Function *F = BB->getParent(); | |||
| 1444 | ||||
| 1445 | // Given: atomicrmw some_op iN* %addr, iN %incr ordering | |||
| 1446 | // | |||
| 1447 | // The standard expansion we produce is: | |||
| 1448 | // [...] | |||
| 1449 | // %init_loaded = load atomic iN* %addr | |||
| 1450 | // br label %loop | |||
| 1451 | // loop: | |||
| 1452 | // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] | |||
| 1453 | // %new = some_op iN %loaded, %incr | |||
| 1454 | // %pair = cmpxchg iN* %addr, iN %loaded, iN %new | |||
| 1455 | // %new_loaded = extractvalue { iN, i1 } %pair, 0 | |||
| 1456 | // %success = extractvalue { iN, i1 } %pair, 1 | |||
| 1457 | // br i1 %success, label %atomicrmw.end, label %loop | |||
| 1458 | // atomicrmw.end: | |||
| 1459 | // [...] | |||
| 1460 | BasicBlock *ExitBB = | |||
| 1461 | BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); | |||
| 1462 | BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); | |||
| 1463 | ||||
| 1464 | // The split call above "helpfully" added a branch at the end of BB (to the | |||
| 1465 | // wrong place), but we want a load. It's easiest to just remove | |||
| 1466 | // the branch entirely. | |||
| 1467 | std::prev(BB->end())->eraseFromParent(); | |||
| 1468 | Builder.SetInsertPoint(BB); | |||
| 1469 | LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign); | |||
| 1470 | Builder.CreateBr(LoopBB); | |||
| 1471 | ||||
| 1472 | // Start the main loop block now that we've taken care of the preliminaries. | |||
| 1473 | Builder.SetInsertPoint(LoopBB); | |||
| 1474 | PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded"); | |||
| 1475 | Loaded->addIncoming(InitLoaded, BB); | |||
| 1476 | ||||
| 1477 | Value *NewVal = PerformOp(Builder, Loaded); | |||
| 1478 | ||||
| 1479 | Value *NewLoaded = nullptr; | |||
| 1480 | Value *Success = nullptr; | |||
| 1481 | ||||
| 1482 | CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign, | |||
| 1483 | MemOpOrder == AtomicOrdering::Unordered | |||
| 1484 | ? AtomicOrdering::Monotonic | |||
| 1485 | : MemOpOrder, | |||
| 1486 | SSID, Success, NewLoaded); | |||
| 1487 | assert(Success && NewLoaded)((void)0); | |||
| 1488 | ||||
| 1489 | Loaded->addIncoming(NewLoaded, LoopBB); | |||
| 1490 | ||||
| 1491 | Builder.CreateCondBr(Success, ExitBB, LoopBB); | |||
| 1492 | ||||
| 1493 | Builder.SetInsertPoint(ExitBB, ExitBB->begin()); | |||
| 1494 | return NewLoaded; | |||
| 1495 | } | |||
| 1496 | ||||
| 1497 | bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { | |||
| 1498 | unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; | |||
| 1499 | unsigned ValueSize = getAtomicOpSize(CI); | |||
| 1500 | ||||
| 1501 | switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) { | |||
| 1502 | default: | |||
| 1503 | llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg")__builtin_unreachable(); | |||
| 1504 | case TargetLoweringBase::AtomicExpansionKind::None: | |||
| 1505 | if (ValueSize < MinCASSize) | |||
| 1506 | return expandPartwordCmpXchg(CI); | |||
| 1507 | return false; | |||
| 1508 | case TargetLoweringBase::AtomicExpansionKind::LLSC: { | |||
| 1509 | return expandAtomicCmpXchg(CI); | |||
| 1510 | } | |||
| 1511 | case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: | |||
| 1512 | expandAtomicCmpXchgToMaskedIntrinsic(CI); | |||
| 1513 | return true; | |||
| 1514 | } | |||
| 1515 | } | |||
| 1516 | ||||
| 1517 | // Note: This function is exposed externally by AtomicExpandUtils.h | |||
| 1518 | bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, | |||
| 1519 | CreateCmpXchgInstFun CreateCmpXchg) { | |||
| 1520 | IRBuilder<> Builder(AI); | |||
| 1521 | Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop( | |||
| 1522 | Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(), | |||
| 1523 | AI->getOrdering(), AI->getSyncScopeID(), | |||
| 1524 | [&](IRBuilder<> &Builder, Value *Loaded) { | |||
| 1525 | return performAtomicOp(AI->getOperation(), Builder, Loaded, | |||
| 1526 | AI->getValOperand()); | |||
| 1527 | }, | |||
| 1528 | CreateCmpXchg); | |||
| 1529 | ||||
| 1530 | AI->replaceAllUsesWith(Loaded); | |||
| 1531 | AI->eraseFromParent(); | |||
| 1532 | return true; | |||
| 1533 | } | |||
| 1534 | ||||
| 1535 | // In order to use one of the sized library calls such as | |||
| 1536 | // __atomic_fetch_add_4, the alignment must be sufficient, the size | |||
| 1537 | // must be one of the potentially-specialized sizes, and the value | |||
| 1538 | // type must actually exist in C on the target (otherwise, the | |||
| 1539 | // function wouldn't actually be defined.) | |||
| 1540 | static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, | |||
| 1541 | const DataLayout &DL) { | |||
| 1542 | // TODO: "LargestSize" is an approximation for "largest type that | |||
| 1543 | // you can express in C". It seems to be the case that int128 is | |||
| 1544 | // supported on all 64-bit platforms, otherwise only up to 64-bit | |||
| 1545 | // integers are supported. If we get this wrong, then we'll try to | |||
| 1546 | // call a sized libcall that doesn't actually exist. There should | |||
| 1547 | // really be some more reliable way in LLVM of determining integer | |||
| 1548 | // sizes which are valid in the target's C ABI... | |||
| 1549 | unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8; | |||
| 1550 | return Alignment >= Size && | |||
| 1551 | (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) && | |||
| 1552 | Size <= LargestSize; | |||
| 1553 | } | |||
| 1554 | ||||
| 1555 | void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) { | |||
| 1556 | static const RTLIB::Libcall Libcalls[6] = { | |||
| 1557 | RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2, | |||
| 1558 | RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16}; | |||
| 1559 | unsigned Size = getAtomicOpSize(I); | |||
| 1560 | ||||
| 1561 | bool expanded = expandAtomicOpToLibcall( | |||
| 1562 | I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr, | |||
| 1563 | I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); | |||
| 1564 | if (!expanded) | |||
| 1565 | report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load"); | |||
| 1566 | } | |||
| 1567 | ||||
| 1568 | void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) { | |||
| 1569 | static const RTLIB::Libcall Libcalls[6] = { | |||
| 1570 | RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2, | |||
| 1571 | RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16}; | |||
| 1572 | unsigned Size = getAtomicOpSize(I); | |||
| 1573 | ||||
| 1574 | bool expanded = expandAtomicOpToLibcall( | |||
| 1575 | I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(), | |||
| 1576 | nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); | |||
| 1577 | if (!expanded) | |||
| 1578 | report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store"); | |||
| 1579 | } | |||
| 1580 | ||||
| 1581 | void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { | |||
| 1582 | static const RTLIB::Libcall Libcalls[6] = { | |||
| 1583 | RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1, | |||
| 1584 | RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4, | |||
| 1585 | RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16}; | |||
| 1586 | unsigned Size = getAtomicOpSize(I); | |||
| 1587 | ||||
| 1588 | bool expanded = expandAtomicOpToLibcall( | |||
| 1589 | I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(), | |||
| 1590 | I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(), | |||
| 1591 | Libcalls); | |||
| 1592 | if (!expanded) | |||
| 1593 | report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS"); | |||
| 1594 | } | |||
| 1595 | ||||
| 1596 | static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { | |||
| 1597 | static const RTLIB::Libcall LibcallsXchg[6] = { | |||
| 1598 | RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1, | |||
| 1599 | RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4, | |||
| 1600 | RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16}; | |||
| 1601 | static const RTLIB::Libcall LibcallsAdd[6] = { | |||
| 1602 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1, | |||
| 1603 | RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4, | |||
| 1604 | RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16}; | |||
| 1605 | static const RTLIB::Libcall LibcallsSub[6] = { | |||
| 1606 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1, | |||
| 1607 | RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4, | |||
| 1608 | RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16}; | |||
| 1609 | static const RTLIB::Libcall LibcallsAnd[6] = { | |||
| 1610 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1, | |||
| 1611 | RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4, | |||
| 1612 | RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16}; | |||
| 1613 | static const RTLIB::Libcall LibcallsOr[6] = { | |||
| 1614 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1, | |||
| 1615 | RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4, | |||
| 1616 | RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16}; | |||
| 1617 | static const RTLIB::Libcall LibcallsXor[6] = { | |||
| 1618 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1, | |||
| 1619 | RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4, | |||
| 1620 | RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16}; | |||
| 1621 | static const RTLIB::Libcall LibcallsNand[6] = { | |||
| 1622 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1, | |||
| 1623 | RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4, | |||
| 1624 | RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16}; | |||
| 1625 | ||||
| 1626 | switch (Op) { | |||
| 1627 | case AtomicRMWInst::BAD_BINOP: | |||
| 1628 | llvm_unreachable("Should not have BAD_BINOP.")__builtin_unreachable(); | |||
| 1629 | case AtomicRMWInst::Xchg: | |||
| 1630 | return makeArrayRef(LibcallsXchg); | |||
| 1631 | case AtomicRMWInst::Add: | |||
| 1632 | return makeArrayRef(LibcallsAdd); | |||
| 1633 | case AtomicRMWInst::Sub: | |||
| 1634 | return makeArrayRef(LibcallsSub); | |||
| 1635 | case AtomicRMWInst::And: | |||
| 1636 | return makeArrayRef(LibcallsAnd); | |||
| 1637 | case AtomicRMWInst::Or: | |||
| 1638 | return makeArrayRef(LibcallsOr); | |||
| 1639 | case AtomicRMWInst::Xor: | |||
| 1640 | return makeArrayRef(LibcallsXor); | |||
| 1641 | case AtomicRMWInst::Nand: | |||
| 1642 | return makeArrayRef(LibcallsNand); | |||
| 1643 | case AtomicRMWInst::Max: | |||
| 1644 | case AtomicRMWInst::Min: | |||
| 1645 | case AtomicRMWInst::UMax: | |||
| 1646 | case AtomicRMWInst::UMin: | |||
| 1647 | case AtomicRMWInst::FAdd: | |||
| 1648 | case AtomicRMWInst::FSub: | |||
| 1649 | // No atomic libcalls are available for max/min/umax/umin. | |||
| 1650 | return {}; | |||
| 1651 | } | |||
| 1652 | llvm_unreachable("Unexpected AtomicRMW operation.")__builtin_unreachable(); | |||
| 1653 | } | |||
| 1654 | ||||
| 1655 | void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { | |||
| 1656 | ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation()); | |||
| 1657 | ||||
| 1658 | unsigned Size = getAtomicOpSize(I); | |||
| 1659 | ||||
| 1660 | bool Success = false; | |||
| 1661 | if (!Libcalls.empty()) | |||
| 1662 | Success = expandAtomicOpToLibcall( | |||
| 1663 | I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(), | |||
| 1664 | nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); | |||
| 1665 | ||||
| 1666 | // The expansion failed: either there were no libcalls at all for | |||
| 1667 | // the operation (min/max), or there were only size-specialized | |||
| 1668 | // libcalls (add/sub/etc) and we needed a generic. So, expand to a | |||
| 1669 | // CAS libcall, via a CAS loop, instead. | |||
| 1670 | if (!Success) { | |||
| 1671 | expandAtomicRMWToCmpXchg( | |||
| 1672 | I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded, | |||
| 1673 | Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, | |||
| 1674 | SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { | |||
| 1675 | // Create the CAS instruction normally... | |||
| 1676 | AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( | |||
| 1677 | Addr, Loaded, NewVal, Alignment, MemOpOrder, | |||
| 1678 | AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); | |||
| 1679 | Success = Builder.CreateExtractValue(Pair, 1, "success"); | |||
| 1680 | NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); | |||
| 1681 | ||||
| 1682 | // ...and then expand the CAS into a libcall. | |||
| 1683 | expandAtomicCASToLibcall(Pair); | |||
| 1684 | }); | |||
| 1685 | } | |||
| 1686 | } | |||
| 1687 | ||||
| 1688 | // A helper routine for the above expandAtomic*ToLibcall functions. | |||
| 1689 | // | |||
| 1690 | // 'Libcalls' contains an array of enum values for the particular | |||
| 1691 | // ATOMIC libcalls to be emitted. All of the other arguments besides | |||
| 1692 | // 'I' are extracted from the Instruction subclass by the | |||
| 1693 | // caller. Depending on the particular call, some will be null. | |||
| 1694 | bool AtomicExpand::expandAtomicOpToLibcall( | |||
| 1695 | Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand, | |||
| 1696 | Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, | |||
| 1697 | AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) { | |||
| 1698 | assert(Libcalls.size() == 6)((void)0); | |||
| 1699 | ||||
| 1700 | LLVMContext &Ctx = I->getContext(); | |||
| 1701 | Module *M = I->getModule(); | |||
| 1702 | const DataLayout &DL = M->getDataLayout(); | |||
| 1703 | IRBuilder<> Builder(I); | |||
| 1704 | IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front()); | |||
| 1705 | ||||
| 1706 | bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL); | |||
| 1707 | Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8); | |||
| 1708 | ||||
| 1709 | const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy); | |||
| 1710 | ||||
| 1711 | // TODO: the "order" argument type is "int", not int32. So | |||
| 1712 | // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints. | |||
| 1713 | ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size); | |||
| 1714 | assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO")((void)0); | |||
| 1715 | Constant *OrderingVal = | |||
| 1716 | ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering)); | |||
| 1717 | Constant *Ordering2Val = nullptr; | |||
| 1718 | if (CASExpected) { | |||
| 1719 | assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO")((void)0); | |||
| 1720 | Ordering2Val = | |||
| 1721 | ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2)); | |||
| 1722 | } | |||
| 1723 | bool HasResult = I->getType() != Type::getVoidTy(Ctx); | |||
| 1724 | ||||
| 1725 | RTLIB::Libcall RTLibType; | |||
| 1726 | if (UseSizedLibcall) { | |||
| 1727 | switch (Size) { | |||
| 1728 | case 1: RTLibType = Libcalls[1]; break; | |||
| 1729 | case 2: RTLibType = Libcalls[2]; break; | |||
| 1730 | case 4: RTLibType = Libcalls[3]; break; | |||
| 1731 | case 8: RTLibType = Libcalls[4]; break; | |||
| 1732 | case 16: RTLibType = Libcalls[5]; break; | |||
| 1733 | } | |||
| 1734 | } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) { | |||
| 1735 | RTLibType = Libcalls[0]; | |||
| 1736 | } else { | |||
| 1737 | // Can't use sized function, and there's no generic for this | |||
| 1738 | // operation, so give up. | |||
| 1739 | return false; | |||
| 1740 | } | |||
| 1741 | ||||
| 1742 | if (!TLI->getLibcallName(RTLibType)) { | |||
| 1743 | // This target does not implement the requested atomic libcall so give up. | |||
| 1744 | return false; | |||
| 1745 | } | |||
| 1746 | ||||
| 1747 | // Build up the function call. There's two kinds. First, the sized | |||
| 1748 | // variants. These calls are going to be one of the following (with | |||
| 1749 | // N=1,2,4,8,16): | |||
| 1750 | // iN __atomic_load_N(iN *ptr, int ordering) | |||
| 1751 | // void __atomic_store_N(iN *ptr, iN val, int ordering) | |||
| 1752 | // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering) | |||
| 1753 | // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, | |||
| 1754 | // int success_order, int failure_order) | |||
| 1755 | // | |||
| 1756 | // Note that these functions can be used for non-integer atomic | |||
| 1757 | // operations, the values just need to be bitcast to integers on the | |||
| 1758 | // way in and out. | |||
| 1759 | // | |||
| 1760 | // And, then, the generic variants. They look like the following: | |||
| 1761 | // void __atomic_load(size_t size, void *ptr, void *ret, int ordering) | |||
| 1762 | // void __atomic_store(size_t size, void *ptr, void *val, int ordering) | |||
| 1763 | // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, | |||
| 1764 | // int ordering) | |||
| 1765 | // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, | |||
| 1766 | // void *desired, int success_order, | |||
| 1767 | // int failure_order) | |||
| 1768 | // | |||
| 1769 | // The different signatures are built up depending on the | |||
| 1770 | // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult' | |||
| 1771 | // variables. | |||
| 1772 | ||||
| 1773 | AllocaInst *AllocaCASExpected = nullptr; | |||
| 1774 | Value *AllocaCASExpected_i8 = nullptr; | |||
| 1775 | AllocaInst *AllocaValue = nullptr; | |||
| 1776 | Value *AllocaValue_i8 = nullptr; | |||
| 1777 | AllocaInst *AllocaResult = nullptr; | |||
| 1778 | Value *AllocaResult_i8 = nullptr; | |||
| 1779 | ||||
| 1780 | Type *ResultTy; | |||
| 1781 | SmallVector<Value *, 6> Args; | |||
| 1782 | AttributeList Attr; | |||
| 1783 | ||||
| 1784 | // 'size' argument. | |||
| 1785 | if (!UseSizedLibcall) { | |||
| 1786 | // Note, getIntPtrType is assumed equivalent to size_t. | |||
| 1787 | Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size)); | |||
| 1788 | } | |||
| 1789 | ||||
| 1790 | // 'ptr' argument. | |||
| 1791 | // note: This assumes all address spaces share a common libfunc | |||
| 1792 | // implementation and that addresses are convertable. For systems without | |||
| 1793 | // that property, we'd need to extend this mechanism to support AS-specific | |||
| 1794 | // families of atomic intrinsics. | |||
| 1795 | auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace(); | |||
| 1796 | Value *PtrVal = Builder.CreateBitCast(PointerOperand, | |||
| 1797 | Type::getInt8PtrTy(Ctx, PtrTypeAS)); | |||
| 1798 | PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx)); | |||
| 1799 | Args.push_back(PtrVal); | |||
| 1800 | ||||
| 1801 | // 'expected' argument, if present. | |||
| 1802 | if (CASExpected) { | |||
| 1803 | AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); | |||
| 1804 | AllocaCASExpected->setAlignment(AllocaAlignment); | |||
| 1805 | unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); | |||
| 1806 | ||||
| 1807 | AllocaCASExpected_i8 = | |||
| 1808 | Builder.CreateBitCast(AllocaCASExpected, | |||
| 1809 | Type::getInt8PtrTy(Ctx, AllocaAS)); | |||
| 1810 | Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64); | |||
| 1811 | Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment); | |||
| 1812 | Args.push_back(AllocaCASExpected_i8); | |||
| 1813 | } | |||
| 1814 | ||||
| 1815 | // 'val' argument ('desired' for cas), if present. | |||
| 1816 | if (ValueOperand) { | |||
| 1817 | if (UseSizedLibcall) { | |||
| 1818 | Value *IntValue = | |||
| 1819 | Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy); | |||
| 1820 | Args.push_back(IntValue); | |||
| 1821 | } else { | |||
| 1822 | AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); | |||
| 1823 | AllocaValue->setAlignment(AllocaAlignment); | |||
| 1824 | AllocaValue_i8 = | |||
| 1825 | Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx)); | |||
| 1826 | Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64); | |||
| 1827 | Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment); | |||
| 1828 | Args.push_back(AllocaValue_i8); | |||
| 1829 | } | |||
| 1830 | } | |||
| 1831 | ||||
| 1832 | // 'ret' argument. | |||
| 1833 | if (!CASExpected && HasResult && !UseSizedLibcall) { | |||
| 1834 | AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); | |||
| 1835 | AllocaResult->setAlignment(AllocaAlignment); | |||
| 1836 | unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); | |||
| 1837 | AllocaResult_i8 = | |||
| 1838 | Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); | |||
| 1839 | Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64); | |||
| 1840 | Args.push_back(AllocaResult_i8); | |||
| 1841 | } | |||
| 1842 | ||||
| 1843 | // 'ordering' ('success_order' for cas) argument. | |||
| 1844 | Args.push_back(OrderingVal); | |||
| 1845 | ||||
| 1846 | // 'failure_order' argument, if present. | |||
| 1847 | if (Ordering2Val) | |||
| 1848 | Args.push_back(Ordering2Val); | |||
| 1849 | ||||
| 1850 | // Now, the return type. | |||
| 1851 | if (CASExpected) { | |||
| 1852 | ResultTy = Type::getInt1Ty(Ctx); | |||
| 1853 | Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt); | |||
| 1854 | } else if (HasResult && UseSizedLibcall) | |||
| 1855 | ResultTy = SizedIntTy; | |||
| 1856 | else | |||
| 1857 | ResultTy = Type::getVoidTy(Ctx); | |||
| 1858 | ||||
| 1859 | // Done with setting up arguments and return types, create the call: | |||
| 1860 | SmallVector<Type *, 6> ArgTys; | |||
| 1861 | for (Value *Arg : Args) | |||
| 1862 | ArgTys.push_back(Arg->getType()); | |||
| 1863 | FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false); | |||
| 1864 | FunctionCallee LibcallFn = | |||
| 1865 | M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr); | |||
| 1866 | CallInst *Call = Builder.CreateCall(LibcallFn, Args); | |||
| 1867 | Call->setAttributes(Attr); | |||
| 1868 | Value *Result = Call; | |||
| 1869 | ||||
| 1870 | // And then, extract the results... | |||
| 1871 | if (ValueOperand && !UseSizedLibcall) | |||
| 1872 | Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64); | |||
| 1873 | ||||
| 1874 | if (CASExpected) { | |||
| 1875 | // The final result from the CAS is {load of 'expected' alloca, bool result | |||
| 1876 | // from call} | |||
| 1877 | Type *FinalResultTy = I->getType(); | |||
| 1878 | Value *V = UndefValue::get(FinalResultTy); | |||
| 1879 | Value *ExpectedOut = Builder.CreateAlignedLoad( | |||
| 1880 | CASExpected->getType(), AllocaCASExpected, AllocaAlignment); | |||
| 1881 | Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64); | |||
| 1882 | V = Builder.CreateInsertValue(V, ExpectedOut, 0); | |||
| 1883 | V = Builder.CreateInsertValue(V, Result, 1); | |||
| 1884 | I->replaceAllUsesWith(V); | |||
| 1885 | } else if (HasResult) { | |||
| 1886 | Value *V; | |||
| 1887 | if (UseSizedLibcall) | |||
| 1888 | V = Builder.CreateBitOrPointerCast(Result, I->getType()); | |||
| 1889 | else { | |||
| 1890 | V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, | |||
| 1891 | AllocaAlignment); | |||
| 1892 | Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64); | |||
| 1893 | } | |||
| 1894 | I->replaceAllUsesWith(V); | |||
| 1895 | } | |||
| 1896 | I->eraseFromParent(); | |||
| 1897 | return true; | |||
| 1898 | } |
| 1 | //===-- llvm/Support/Alignment.h - Useful alignment functions ---*- C++ -*-===// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | // | |||
| 9 | // This file contains types to represent alignments. | |||
| 10 | // They are instrumented to guarantee some invariants are preserved and prevent | |||
| 11 | // invalid manipulations. | |||
| 12 | // | |||
| 13 | // - Align represents an alignment in bytes, it is always set and always a valid | |||
| 14 | // power of two, its minimum value is 1 which means no alignment requirements. | |||
| 15 | // | |||
| 16 | // - MaybeAlign is an optional type, it may be undefined or set. When it's set | |||
| 17 | // you can get the underlying Align type by using the getValue() method. | |||
| 18 | // | |||
| 19 | //===----------------------------------------------------------------------===// | |||
| 20 | ||||
| 21 | #ifndef LLVM_SUPPORT_ALIGNMENT_H_ | |||
| 22 | #define LLVM_SUPPORT_ALIGNMENT_H_ | |||
| 23 | ||||
| 24 | #include "llvm/ADT/Optional.h" | |||
| 25 | #include "llvm/Support/MathExtras.h" | |||
| 26 | #include <cassert> | |||
| 27 | #ifndef NDEBUG1 | |||
| 28 | #include <string> | |||
| 29 | #endif // NDEBUG | |||
| 30 | ||||
| 31 | namespace llvm { | |||
| 32 | ||||
| 33 | #define ALIGN_CHECK_ISPOSITIVE(decl) \ | |||
| 34 | assert(decl > 0 && (#decl " should be defined"))((void)0) | |||
| 35 | ||||
| 36 | /// This struct is a compact representation of a valid (non-zero power of two) | |||
| 37 | /// alignment. | |||
| 38 | /// It is suitable for use as static global constants. | |||
| 39 | struct Align { | |||
| 40 | private: | |||
| 41 | uint8_t ShiftValue = 0; /// The log2 of the required alignment. | |||
| 42 | /// ShiftValue is less than 64 by construction. | |||
| 43 | ||||
| 44 | friend struct MaybeAlign; | |||
| 45 | friend unsigned Log2(Align); | |||
| 46 | friend bool operator==(Align Lhs, Align Rhs); | |||
| 47 | friend bool operator!=(Align Lhs, Align Rhs); | |||
| 48 | friend bool operator<=(Align Lhs, Align Rhs); | |||
| 49 | friend bool operator>=(Align Lhs, Align Rhs); | |||
| 50 | friend bool operator<(Align Lhs, Align Rhs); | |||
| 51 | friend bool operator>(Align Lhs, Align Rhs); | |||
| 52 | friend unsigned encode(struct MaybeAlign A); | |||
| 53 | friend struct MaybeAlign decodeMaybeAlign(unsigned Value); | |||
| 54 | ||||
| 55 | /// A trivial type to allow construction of constexpr Align. | |||
| 56 | /// This is currently needed to workaround a bug in GCC 5.3 which prevents | |||
| 57 | /// definition of constexpr assign operators. | |||
| 58 | /// https://stackoverflow.com/questions/46756288/explicitly-defaulted-function-cannot-be-declared-as-constexpr-because-the-implic | |||
| 59 | /// FIXME: Remove this, make all assign operators constexpr and introduce user | |||
| 60 | /// defined literals when we don't have to support GCC 5.3 anymore. | |||
| 61 | /// https://llvm.org/docs/GettingStarted.html#getting-a-modern-host-c-toolchain | |||
| 62 | struct LogValue { | |||
| 63 | uint8_t Log; | |||
| 64 | }; | |||
| 65 | ||||
| 66 | public: | |||
| 67 | /// Default is byte-aligned. | |||
| 68 | constexpr Align() = default; | |||
| 69 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
| 70 | /// checks have been performed when building `Other`. | |||
| 71 | constexpr Align(const Align &Other) = default; | |||
| 72 | constexpr Align(Align &&Other) = default; | |||
| 73 | Align &operator=(const Align &Other) = default; | |||
| 74 | Align &operator=(Align &&Other) = default; | |||
| 75 | ||||
| 76 | explicit Align(uint64_t Value) { | |||
| 77 | assert(Value > 0 && "Value must not be 0")((void)0); | |||
| 78 | assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2")((void)0); | |||
| 79 | ShiftValue = Log2_64(Value); | |||
| 80 | assert(ShiftValue < 64 && "Broken invariant")((void)0); | |||
| 81 | } | |||
| 82 | ||||
| 83 | /// This is a hole in the type system and should not be abused. | |||
| 84 | /// Needed to interact with C for instance. | |||
| 85 | uint64_t value() const { return uint64_t(1) << ShiftValue; } | |||
| ||||
| 86 | ||||
| 87 | /// Allow constructions of constexpr Align. | |||
| 88 | template <size_t kValue> constexpr static LogValue Constant() { | |||
| 89 | return LogValue{static_cast<uint8_t>(CTLog2<kValue>())}; | |||
| 90 | } | |||
| 91 | ||||
| 92 | /// Allow constructions of constexpr Align from types. | |||
| 93 | /// Compile time equivalent to Align(alignof(T)). | |||
| 94 | template <typename T> constexpr static LogValue Of() { | |||
| 95 | return Constant<std::alignment_of<T>::value>(); | |||
| 96 | } | |||
| 97 | ||||
| 98 | /// Constexpr constructor from LogValue type. | |||
| 99 | constexpr Align(LogValue CA) : ShiftValue(CA.Log) {} | |||
| 100 | }; | |||
| 101 | ||||
| 102 | /// Treats the value 0 as a 1, so Align is always at least 1. | |||
| 103 | inline Align assumeAligned(uint64_t Value) { | |||
| 104 | return Value ? Align(Value) : Align(); | |||
| 105 | } | |||
| 106 | ||||
| 107 | /// This struct is a compact representation of a valid (power of two) or | |||
| 108 | /// undefined (0) alignment. | |||
| 109 | struct MaybeAlign : public llvm::Optional<Align> { | |||
| 110 | private: | |||
| 111 | using UP = llvm::Optional<Align>; | |||
| 112 | ||||
| 113 | public: | |||
| 114 | /// Default is undefined. | |||
| 115 | MaybeAlign() = default; | |||
| 116 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
| 117 | /// checks have been performed when building `Other`. | |||
| 118 | MaybeAlign(const MaybeAlign &Other) = default; | |||
| 119 | MaybeAlign &operator=(const MaybeAlign &Other) = default; | |||
| 120 | MaybeAlign(MaybeAlign &&Other) = default; | |||
| 121 | MaybeAlign &operator=(MaybeAlign &&Other) = default; | |||
| 122 | ||||
| 123 | /// Use llvm::Optional<Align> constructor. | |||
| 124 | using UP::UP; | |||
| 125 | ||||
| 126 | explicit MaybeAlign(uint64_t Value) { | |||
| 127 | assert((Value == 0 || llvm::isPowerOf2_64(Value)) &&((void)0) | |||
| 128 | "Alignment is neither 0 nor a power of 2")((void)0); | |||
| 129 | if (Value) | |||
| 130 | emplace(Value); | |||
| 131 | } | |||
| 132 | ||||
| 133 | /// For convenience, returns a valid alignment or 1 if undefined. | |||
| 134 | Align valueOrOne() const { return hasValue() ? getValue() : Align(); } | |||
| 135 | }; | |||
| 136 | ||||
| 137 | /// Checks that SizeInBytes is a multiple of the alignment. | |||
| 138 | inline bool isAligned(Align Lhs, uint64_t SizeInBytes) { | |||
| 139 | return SizeInBytes % Lhs.value() == 0; | |||
| 140 | } | |||
| 141 | ||||
| 142 | /// Checks that Addr is a multiple of the alignment. | |||
| 143 | inline bool isAddrAligned(Align Lhs, const void *Addr) { | |||
| 144 | return isAligned(Lhs, reinterpret_cast<uintptr_t>(Addr)); | |||
| 145 | } | |||
| 146 | ||||
| 147 | /// Returns a multiple of A needed to store `Size` bytes. | |||
| 148 | inline uint64_t alignTo(uint64_t Size, Align A) { | |||
| 149 | const uint64_t Value = A.value(); | |||
| 150 | // The following line is equivalent to `(Size + Value - 1) / Value * Value`. | |||
| 151 | ||||
| 152 | // The division followed by a multiplication can be thought of as a right | |||
| 153 | // shift followed by a left shift which zeros out the extra bits produced in | |||
| 154 | // the bump; `~(Value - 1)` is a mask where all those bits being zeroed out | |||
| 155 | // are just zero. | |||
| 156 | ||||
| 157 | // Most compilers can generate this code but the pattern may be missed when | |||
| 158 | // multiple functions gets inlined. | |||
| 159 | return (Size + Value - 1) & ~(Value - 1U); | |||
| 160 | } | |||
| 161 | ||||
| 162 | /// If non-zero \p Skew is specified, the return value will be a minimal integer | |||
| 163 | /// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for | |||
| 164 | /// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p | |||
| 165 | /// Skew mod \p A'. | |||
| 166 | /// | |||
| 167 | /// Examples: | |||
| 168 | /// \code | |||
| 169 | /// alignTo(5, Align(8), 7) = 7 | |||
| 170 | /// alignTo(17, Align(8), 1) = 17 | |||
| 171 | /// alignTo(~0LL, Align(8), 3) = 3 | |||
| 172 | /// \endcode | |||
| 173 | inline uint64_t alignTo(uint64_t Size, Align A, uint64_t Skew) { | |||
| 174 | const uint64_t Value = A.value(); | |||
| 175 | Skew %= Value; | |||
| 176 | return ((Size + Value - 1 - Skew) & ~(Value - 1U)) + Skew; | |||
| 177 | } | |||
| 178 | ||||
| 179 | /// Returns a multiple of A needed to store `Size` bytes. | |||
| 180 | /// Returns `Size` if current alignment is undefined. | |||
| 181 | inline uint64_t alignTo(uint64_t Size, MaybeAlign A) { | |||
| 182 | return A ? alignTo(Size, A.getValue()) : Size; | |||
| 183 | } | |||
| 184 | ||||
| 185 | /// Aligns `Addr` to `Alignment` bytes, rounding up. | |||
| 186 | inline uintptr_t alignAddr(const void *Addr, Align Alignment) { | |||
| 187 | uintptr_t ArithAddr = reinterpret_cast<uintptr_t>(Addr); | |||
| 188 | assert(static_cast<uintptr_t>(ArithAddr + Alignment.value() - 1) >=((void)0) | |||
| 189 | ArithAddr &&((void)0) | |||
| 190 | "Overflow")((void)0); | |||
| 191 | return alignTo(ArithAddr, Alignment); | |||
| 192 | } | |||
| 193 | ||||
| 194 | /// Returns the offset to the next integer (mod 2**64) that is greater than | |||
| 195 | /// or equal to \p Value and is a multiple of \p Align. | |||
| 196 | inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) { | |||
| 197 | return alignTo(Value, Alignment) - Value; | |||
| 198 | } | |||
| 199 | ||||
| 200 | /// Returns the necessary adjustment for aligning `Addr` to `Alignment` | |||
| 201 | /// bytes, rounding up. | |||
| 202 | inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) { | |||
| 203 | return offsetToAlignment(reinterpret_cast<uintptr_t>(Addr), Alignment); | |||
| 204 | } | |||
| 205 | ||||
| 206 | /// Returns the log2 of the alignment. | |||
| 207 | inline unsigned Log2(Align A) { return A.ShiftValue; } | |||
| 208 | ||||
| 209 | /// Returns the alignment that satisfies both alignments. | |||
| 210 | /// Same semantic as MinAlign. | |||
| 211 | inline Align commonAlignment(Align A, Align B) { return std::min(A, B); } | |||
| 212 | ||||
| 213 | /// Returns the alignment that satisfies both alignments. | |||
| 214 | /// Same semantic as MinAlign. | |||
| 215 | inline Align commonAlignment(Align A, uint64_t Offset) { | |||
| 216 | return Align(MinAlign(A.value(), Offset)); | |||
| 217 | } | |||
| 218 | ||||
| 219 | /// Returns the alignment that satisfies both alignments. | |||
| 220 | /// Same semantic as MinAlign. | |||
| 221 | inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) { | |||
| 222 | return A && B ? commonAlignment(*A, *B) : A ? A : B; | |||
| 223 | } | |||
| 224 | ||||
| 225 | /// Returns the alignment that satisfies both alignments. | |||
| 226 | /// Same semantic as MinAlign. | |||
| 227 | inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) { | |||
| 228 | return MaybeAlign(MinAlign((*A).value(), Offset)); | |||
| 229 | } | |||
| 230 | ||||
| 231 | /// Returns a representation of the alignment that encodes undefined as 0. | |||
| 232 | inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; } | |||
| 233 | ||||
| 234 | /// Dual operation of the encode function above. | |||
| 235 | inline MaybeAlign decodeMaybeAlign(unsigned Value) { | |||
| 236 | if (Value == 0) | |||
| 237 | return MaybeAlign(); | |||
| 238 | Align Out; | |||
| 239 | Out.ShiftValue = Value - 1; | |||
| 240 | return Out; | |||
| 241 | } | |||
| 242 | ||||
| 243 | /// Returns a representation of the alignment, the encoded value is positive by | |||
| 244 | /// definition. | |||
| 245 | inline unsigned encode(Align A) { return encode(MaybeAlign(A)); } | |||
| 246 | ||||
| 247 | /// Comparisons between Align and scalars. Rhs must be positive. | |||
| 248 | inline bool operator==(Align Lhs, uint64_t Rhs) { | |||
| 249 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 250 | return Lhs.value() == Rhs; | |||
| 251 | } | |||
| 252 | inline bool operator!=(Align Lhs, uint64_t Rhs) { | |||
| 253 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 254 | return Lhs.value() != Rhs; | |||
| 255 | } | |||
| 256 | inline bool operator<=(Align Lhs, uint64_t Rhs) { | |||
| 257 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 258 | return Lhs.value() <= Rhs; | |||
| 259 | } | |||
| 260 | inline bool operator>=(Align Lhs, uint64_t Rhs) { | |||
| 261 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 262 | return Lhs.value() >= Rhs; | |||
| 263 | } | |||
| 264 | inline bool operator<(Align Lhs, uint64_t Rhs) { | |||
| 265 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 266 | return Lhs.value() < Rhs; | |||
| 267 | } | |||
| 268 | inline bool operator>(Align Lhs, uint64_t Rhs) { | |||
| 269 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 270 | return Lhs.value() > Rhs; | |||
| 271 | } | |||
| 272 | ||||
| 273 | /// Comparisons between MaybeAlign and scalars. | |||
| 274 | inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) { | |||
| 275 | return Lhs ? (*Lhs).value() == Rhs : Rhs == 0; | |||
| 276 | } | |||
| 277 | inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) { | |||
| 278 | return Lhs ? (*Lhs).value() != Rhs : Rhs != 0; | |||
| 279 | } | |||
| 280 | ||||
| 281 | /// Comparisons operators between Align. | |||
| 282 | inline bool operator==(Align Lhs, Align Rhs) { | |||
| 283 | return Lhs.ShiftValue == Rhs.ShiftValue; | |||
| 284 | } | |||
| 285 | inline bool operator!=(Align Lhs, Align Rhs) { | |||
| 286 | return Lhs.ShiftValue != Rhs.ShiftValue; | |||
| 287 | } | |||
| 288 | inline bool operator<=(Align Lhs, Align Rhs) { | |||
| 289 | return Lhs.ShiftValue <= Rhs.ShiftValue; | |||
| 290 | } | |||
| 291 | inline bool operator>=(Align Lhs, Align Rhs) { | |||
| 292 | return Lhs.ShiftValue >= Rhs.ShiftValue; | |||
| 293 | } | |||
| 294 | inline bool operator<(Align Lhs, Align Rhs) { | |||
| 295 | return Lhs.ShiftValue < Rhs.ShiftValue; | |||
| 296 | } | |||
| 297 | inline bool operator>(Align Lhs, Align Rhs) { | |||
| 298 | return Lhs.ShiftValue > Rhs.ShiftValue; | |||
| 299 | } | |||
| 300 | ||||
| 301 | // Don't allow relational comparisons with MaybeAlign. | |||
| 302 | bool operator<=(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 303 | bool operator>=(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 304 | bool operator<(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 305 | bool operator>(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 306 | ||||
| 307 | bool operator<=(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 308 | bool operator>=(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 309 | bool operator<(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 310 | bool operator>(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 311 | ||||
| 312 | bool operator<=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 313 | bool operator>=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 314 | bool operator<(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 315 | bool operator>(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 316 | ||||
| 317 | inline Align operator*(Align Lhs, uint64_t Rhs) { | |||
| 318 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
| 319 | return Align(Lhs.value() * Rhs); | |||
| 320 | } | |||
| 321 | ||||
| 322 | inline MaybeAlign operator*(MaybeAlign Lhs, uint64_t Rhs) { | |||
| 323 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
| 324 | return Lhs ? Lhs.getValue() * Rhs : MaybeAlign(); | |||
| 325 | } | |||
| 326 | ||||
| 327 | inline Align operator/(Align Lhs, uint64_t Divisor) { | |||
| 328 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
| 329 | "Divisor must be positive and a power of 2")((void)0); | |||
| 330 | assert(Lhs != 1 && "Can't halve byte alignment")((void)0); | |||
| 331 | return Align(Lhs.value() / Divisor); | |||
| 332 | } | |||
| 333 | ||||
| 334 | inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) { | |||
| 335 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
| 336 | "Divisor must be positive and a power of 2")((void)0); | |||
| 337 | return Lhs ? Lhs.getValue() / Divisor : MaybeAlign(); | |||
| 338 | } | |||
| 339 | ||||
| 340 | inline Align max(MaybeAlign Lhs, Align Rhs) { | |||
| 341 | return Lhs && *Lhs > Rhs ? *Lhs : Rhs; | |||
| 342 | } | |||
| 343 | ||||
| 344 | inline Align max(Align Lhs, MaybeAlign Rhs) { | |||
| 345 | return Rhs && *Rhs > Lhs ? *Rhs : Lhs; | |||
| 346 | } | |||
| 347 | ||||
| 348 | #ifndef NDEBUG1 | |||
| 349 | // For usage in LLVM_DEBUG macros. | |||
| 350 | inline std::string DebugStr(const Align &A) { | |||
| 351 | return std::to_string(A.value()); | |||
| 352 | } | |||
| 353 | // For usage in LLVM_DEBUG macros. | |||
| 354 | inline std::string DebugStr(const MaybeAlign &MA) { | |||
| 355 | if (MA) | |||
| 356 | return std::to_string(MA->value()); | |||
| 357 | return "None"; | |||
| 358 | } | |||
| 359 | #endif // NDEBUG | |||
| 360 | ||||
| 361 | #undef ALIGN_CHECK_ISPOSITIVE | |||
| 362 | ||||
| 363 | } // namespace llvm | |||
| 364 | ||||
| 365 | #endif // LLVM_SUPPORT_ALIGNMENT_H_ |