| File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support/Alignment.h |
| Warning: | line 85, column 47 The result of the left shift is undefined due to shifting by '255', which is greater or equal to the width of type 'uint64_t' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | //===-- AMDGPULowerModuleLDSPass.cpp ------------------------------*- C++ -*-=// | ||||
| 2 | // | ||||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
| 6 | // | ||||
| 7 | //===----------------------------------------------------------------------===// | ||||
| 8 | // | ||||
| 9 | // This pass eliminates LDS uses from non-kernel functions. | ||||
| 10 | // | ||||
| 11 | // The strategy is to create a new struct with a field for each LDS variable | ||||
| 12 | // and allocate that struct at the same address for every kernel. Uses of the | ||||
| 13 | // original LDS variables are then replaced with compile time offsets from that | ||||
| 14 | // known address. AMDGPUMachineFunction allocates the LDS global. | ||||
| 15 | // | ||||
| 16 | // Local variables with constant annotation or non-undef initializer are passed | ||||
| 17 | // through unchanged for simplication or error diagnostics in later passes. | ||||
| 18 | // | ||||
| 19 | // To reduce the memory overhead variables that are only used by kernels are | ||||
| 20 | // excluded from this transform. The analysis to determine whether a variable | ||||
| 21 | // is only used by a kernel is cheap and conservative so this may allocate | ||||
| 22 | // a variable in every kernel when it was not strictly necessary to do so. | ||||
| 23 | // | ||||
| 24 | // A possible future refinement is to specialise the structure per-kernel, so | ||||
| 25 | // that fields can be elided based on more expensive analysis. | ||||
| 26 | // | ||||
| 27 | // NOTE: Since this pass will directly pack LDS (assume large LDS) into a struct | ||||
| 28 | // type which would cause allocating huge memory for struct instance within | ||||
| 29 | // every kernel. Hence, before running this pass, it is advisable to run the | ||||
| 30 | // pass "amdgpu-replace-lds-use-with-pointer" which will replace LDS uses within | ||||
| 31 | // non-kernel functions by pointers and thereby minimizes the unnecessary per | ||||
| 32 | // kernel allocation of LDS memory. | ||||
| 33 | // | ||||
| 34 | //===----------------------------------------------------------------------===// | ||||
| 35 | |||||
| 36 | #include "AMDGPU.h" | ||||
| 37 | #include "Utils/AMDGPUBaseInfo.h" | ||||
| 38 | #include "Utils/AMDGPULDSUtils.h" | ||||
| 39 | #include "llvm/ADT/STLExtras.h" | ||||
| 40 | #include "llvm/IR/Constants.h" | ||||
| 41 | #include "llvm/IR/DerivedTypes.h" | ||||
| 42 | #include "llvm/IR/IRBuilder.h" | ||||
| 43 | #include "llvm/IR/InlineAsm.h" | ||||
| 44 | #include "llvm/IR/Instructions.h" | ||||
| 45 | #include "llvm/InitializePasses.h" | ||||
| 46 | #include "llvm/Pass.h" | ||||
| 47 | #include "llvm/Support/CommandLine.h" | ||||
| 48 | #include "llvm/Support/Debug.h" | ||||
| 49 | #include "llvm/Support/OptimizedStructLayout.h" | ||||
| 50 | #include "llvm/Transforms/Utils/ModuleUtils.h" | ||||
| 51 | #include <vector> | ||||
| 52 | |||||
| 53 | #define DEBUG_TYPE"amdgpu-lower-module-lds" "amdgpu-lower-module-lds" | ||||
| 54 | |||||
| 55 | using namespace llvm; | ||||
| 56 | |||||
| 57 | static cl::opt<bool> SuperAlignLDSGlobals( | ||||
| 58 | "amdgpu-super-align-lds-globals", | ||||
| 59 | cl::desc("Increase alignment of LDS if it is not on align boundary"), | ||||
| 60 | cl::init(true), cl::Hidden); | ||||
| 61 | |||||
| 62 | namespace { | ||||
| 63 | |||||
| 64 | class AMDGPULowerModuleLDS : public ModulePass { | ||||
| 65 | |||||
| 66 | static void removeFromUsedList(Module &M, StringRef Name, | ||||
| 67 | SmallPtrSetImpl<Constant *> &ToRemove) { | ||||
| 68 | GlobalVariable *GV = M.getNamedGlobal(Name); | ||||
| 69 | if (!GV || ToRemove.empty()) { | ||||
| 70 | return; | ||||
| 71 | } | ||||
| 72 | |||||
| 73 | SmallVector<Constant *, 16> Init; | ||||
| 74 | auto *CA = cast<ConstantArray>(GV->getInitializer()); | ||||
| 75 | for (auto &Op : CA->operands()) { | ||||
| 76 | // ModuleUtils::appendToUsed only inserts Constants | ||||
| 77 | Constant *C = cast<Constant>(Op); | ||||
| 78 | if (!ToRemove.contains(C->stripPointerCasts())) { | ||||
| 79 | Init.push_back(C); | ||||
| 80 | } | ||||
| 81 | } | ||||
| 82 | |||||
| 83 | if (Init.size() == CA->getNumOperands()) { | ||||
| 84 | return; // none to remove | ||||
| 85 | } | ||||
| 86 | |||||
| 87 | GV->eraseFromParent(); | ||||
| 88 | |||||
| 89 | for (Constant *C : ToRemove) { | ||||
| 90 | C->removeDeadConstantUsers(); | ||||
| 91 | } | ||||
| 92 | |||||
| 93 | if (!Init.empty()) { | ||||
| 94 | ArrayType *ATy = | ||||
| 95 | ArrayType::get(Type::getInt8PtrTy(M.getContext()), Init.size()); | ||||
| 96 | GV = | ||||
| 97 | new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, | ||||
| 98 | ConstantArray::get(ATy, Init), Name); | ||||
| 99 | GV->setSection("llvm.metadata"); | ||||
| 100 | } | ||||
| 101 | } | ||||
| 102 | |||||
| 103 | static void | ||||
| 104 | removeFromUsedLists(Module &M, | ||||
| 105 | const std::vector<GlobalVariable *> &LocalVars) { | ||||
| 106 | SmallPtrSet<Constant *, 32> LocalVarsSet; | ||||
| 107 | for (size_t I = 0; I < LocalVars.size(); I++) { | ||||
| 108 | if (Constant *C = dyn_cast<Constant>(LocalVars[I]->stripPointerCasts())) { | ||||
| 109 | LocalVarsSet.insert(C); | ||||
| 110 | } | ||||
| 111 | } | ||||
| 112 | removeFromUsedList(M, "llvm.used", LocalVarsSet); | ||||
| 113 | removeFromUsedList(M, "llvm.compiler.used", LocalVarsSet); | ||||
| 114 | } | ||||
| 115 | |||||
| 116 | static void markUsedByKernel(IRBuilder<> &Builder, Function *Func, | ||||
| 117 | GlobalVariable *SGV) { | ||||
| 118 | // The llvm.amdgcn.module.lds instance is implicitly used by all kernels | ||||
| 119 | // that might call a function which accesses a field within it. This is | ||||
| 120 | // presently approximated to 'all kernels' if there are any such functions | ||||
| 121 | // in the module. This implicit use is reified as an explicit use here so | ||||
| 122 | // that later passes, specifically PromoteAlloca, account for the required | ||||
| 123 | // memory without any knowledge of this transform. | ||||
| 124 | |||||
| 125 | // An operand bundle on llvm.donothing works because the call instruction | ||||
| 126 | // survives until after the last pass that needs to account for LDS. It is | ||||
| 127 | // better than inline asm as the latter survives until the end of codegen. A | ||||
| 128 | // totally robust solution would be a function with the same semantics as | ||||
| 129 | // llvm.donothing that takes a pointer to the instance and is lowered to a | ||||
| 130 | // no-op after LDS is allocated, but that is not presently necessary. | ||||
| 131 | |||||
| 132 | LLVMContext &Ctx = Func->getContext(); | ||||
| 133 | |||||
| 134 | Builder.SetInsertPoint(Func->getEntryBlock().getFirstNonPHI()); | ||||
| 135 | |||||
| 136 | FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), {}); | ||||
| 137 | |||||
| 138 | Function *Decl = | ||||
| 139 | Intrinsic::getDeclaration(Func->getParent(), Intrinsic::donothing, {}); | ||||
| 140 | |||||
| 141 | Value *UseInstance[1] = {Builder.CreateInBoundsGEP( | ||||
| 142 | SGV->getValueType(), SGV, ConstantInt::get(Type::getInt32Ty(Ctx), 0))}; | ||||
| 143 | |||||
| 144 | Builder.CreateCall(FTy, Decl, {}, | ||||
| 145 | {OperandBundleDefT<Value *>("ExplicitUse", UseInstance)}, | ||||
| 146 | ""); | ||||
| 147 | } | ||||
| 148 | |||||
| 149 | private: | ||||
| 150 | SmallPtrSet<GlobalValue *, 32> UsedList; | ||||
| 151 | |||||
| 152 | public: | ||||
| 153 | static char ID; | ||||
| 154 | |||||
| 155 | AMDGPULowerModuleLDS() : ModulePass(ID) { | ||||
| 156 | initializeAMDGPULowerModuleLDSPass(*PassRegistry::getPassRegistry()); | ||||
| 157 | } | ||||
| 158 | |||||
| 159 | bool runOnModule(Module &M) override { | ||||
| 160 | UsedList = AMDGPU::getUsedList(M); | ||||
| 161 | |||||
| 162 | bool Changed = processUsedLDS(M); | ||||
| 163 | |||||
| 164 | for (Function &F : M.functions()) { | ||||
| 165 | // Only lower compute kernels' LDS. | ||||
| 166 | if (!AMDGPU::isKernel(F.getCallingConv())) | ||||
| 167 | continue; | ||||
| 168 | Changed |= processUsedLDS(M, &F); | ||||
| 169 | } | ||||
| 170 | |||||
| 171 | UsedList.clear(); | ||||
| 172 | return Changed; | ||||
| 173 | } | ||||
| 174 | |||||
| 175 | private: | ||||
| 176 | bool processUsedLDS(Module &M, Function *F = nullptr) { | ||||
| 177 | LLVMContext &Ctx = M.getContext(); | ||||
| 178 | const DataLayout &DL = M.getDataLayout(); | ||||
| 179 | |||||
| 180 | // Find variables to move into new struct instance | ||||
| 181 | std::vector<GlobalVariable *> FoundLocalVars = | ||||
| 182 | AMDGPU::findVariablesToLower(M, F); | ||||
| 183 | |||||
| 184 | if (FoundLocalVars.empty()) { | ||||
| 185 | // No variables to rewrite, no changes made. | ||||
| 186 | return false; | ||||
| 187 | } | ||||
| 188 | |||||
| 189 | // Increase the alignment of LDS globals if necessary to maximise the chance | ||||
| 190 | // that we can use aligned LDS instructions to access them. | ||||
| 191 | if (SuperAlignLDSGlobals) { | ||||
| 192 | for (auto *GV : FoundLocalVars) { | ||||
| 193 | Align Alignment = AMDGPU::getAlign(DL, GV); | ||||
| 194 | TypeSize GVSize = DL.getTypeAllocSize(GV->getValueType()); | ||||
| 195 | |||||
| 196 | if (GVSize > 8) { | ||||
| 197 | // We might want to use a b96 or b128 load/store | ||||
| 198 | Alignment = std::max(Alignment, Align(16)); | ||||
| 199 | } else if (GVSize > 4) { | ||||
| 200 | // We might want to use a b64 load/store | ||||
| 201 | Alignment = std::max(Alignment, Align(8)); | ||||
| 202 | } else if (GVSize > 2) { | ||||
| 203 | // We might want to use a b32 load/store | ||||
| 204 | Alignment = std::max(Alignment, Align(4)); | ||||
| 205 | } else if (GVSize > 1) { | ||||
| 206 | // We might want to use a b16 load/store | ||||
| 207 | Alignment = std::max(Alignment, Align(2)); | ||||
| 208 | } | ||||
| 209 | |||||
| 210 | GV->setAlignment(Alignment); | ||||
| 211 | } | ||||
| 212 | } | ||||
| 213 | |||||
| 214 | SmallVector<OptimizedStructLayoutField, 8> LayoutFields; | ||||
| 215 | LayoutFields.reserve(FoundLocalVars.size()); | ||||
| 216 | for (GlobalVariable *GV : FoundLocalVars) { | ||||
| 217 | OptimizedStructLayoutField F(GV, DL.getTypeAllocSize(GV->getValueType()), | ||||
| 218 | AMDGPU::getAlign(DL, GV)); | ||||
| 219 | LayoutFields.emplace_back(F); | ||||
| 220 | } | ||||
| 221 | |||||
| 222 | performOptimizedStructLayout(LayoutFields); | ||||
| 223 | |||||
| 224 | std::vector<GlobalVariable *> LocalVars; | ||||
| 225 | LocalVars.reserve(FoundLocalVars.size()); // will be at least this large | ||||
| 226 | { | ||||
| 227 | // This usually won't need to insert any padding, perhaps avoid the alloc | ||||
| 228 | uint64_t CurrentOffset = 0; | ||||
| 229 | for (size_t I = 0; I < LayoutFields.size(); I++) { | ||||
| 230 | GlobalVariable *FGV = static_cast<GlobalVariable *>( | ||||
| 231 | const_cast<void *>(LayoutFields[I].Id)); | ||||
| 232 | Align DataAlign = LayoutFields[I].Alignment; | ||||
| 233 | |||||
| 234 | uint64_t DataAlignV = DataAlign.value(); | ||||
| 235 | if (uint64_t Rem = CurrentOffset % DataAlignV) { | ||||
| 236 | uint64_t Padding = DataAlignV - Rem; | ||||
| 237 | |||||
| 238 | // Append an array of padding bytes to meet alignment requested | ||||
| 239 | // Note (o + (a - (o % a)) ) % a == 0 | ||||
| 240 | // (offset + Padding ) % align == 0 | ||||
| 241 | |||||
| 242 | Type *ATy = ArrayType::get(Type::getInt8Ty(Ctx), Padding); | ||||
| 243 | LocalVars.push_back(new GlobalVariable( | ||||
| 244 | M, ATy, false, GlobalValue::InternalLinkage, UndefValue::get(ATy), | ||||
| 245 | "", nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS, | ||||
| 246 | false)); | ||||
| 247 | CurrentOffset += Padding; | ||||
| 248 | } | ||||
| 249 | |||||
| 250 | LocalVars.push_back(FGV); | ||||
| 251 | CurrentOffset += LayoutFields[I].Size; | ||||
| 252 | } | ||||
| 253 | } | ||||
| 254 | |||||
| 255 | std::vector<Type *> LocalVarTypes; | ||||
| 256 | LocalVarTypes.reserve(LocalVars.size()); | ||||
| 257 | std::transform( | ||||
| 258 | LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes), | ||||
| 259 | [](const GlobalVariable *V) -> Type * { return V->getValueType(); }); | ||||
| 260 | |||||
| 261 | std::string VarName( | ||||
| 262 | F
| ||||
| 263 | : "llvm.amdgcn.module.lds"); | ||||
| 264 | StructType *LDSTy = StructType::create(Ctx, LocalVarTypes, VarName + ".t"); | ||||
| 265 | |||||
| 266 | Align StructAlign = | ||||
| 267 | AMDGPU::getAlign(DL, LocalVars[0]); | ||||
| 268 | |||||
| 269 | GlobalVariable *SGV = new GlobalVariable( | ||||
| 270 | M, LDSTy, false, GlobalValue::InternalLinkage, UndefValue::get(LDSTy), | ||||
| 271 | VarName, nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS, | ||||
| 272 | false); | ||||
| 273 | SGV->setAlignment(StructAlign); | ||||
| 274 | if (!F
| ||||
| 275 | appendToCompilerUsed( | ||||
| 276 | M, {static_cast<GlobalValue *>( | ||||
| 277 | ConstantExpr::getPointerBitCastOrAddrSpaceCast( | ||||
| 278 | cast<Constant>(SGV), Type::getInt8PtrTy(Ctx)))}); | ||||
| 279 | } | ||||
| 280 | |||||
| 281 | // The verifier rejects used lists containing an inttoptr of a constant | ||||
| 282 | // so remove the variables from these lists before replaceAllUsesWith | ||||
| 283 | removeFromUsedLists(M, LocalVars); | ||||
| 284 | |||||
| 285 | // Replace uses of ith variable with a constantexpr to the ith field of the | ||||
| 286 | // instance that will be allocated by AMDGPUMachineFunction | ||||
| 287 | Type *I32 = Type::getInt32Ty(Ctx); | ||||
| 288 | for (size_t I = 0; I < LocalVars.size(); I++) { | ||||
| 289 | GlobalVariable *GV = LocalVars[I]; | ||||
| 290 | Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32, I)}; | ||||
| 291 | Constant *GEP = ConstantExpr::getGetElementPtr(LDSTy, SGV, GEPIdx); | ||||
| 292 | if (F
| ||||
| 293 | // Replace all constant uses with instructions if they belong to the | ||||
| 294 | // current kernel. | ||||
| 295 | for (User *U : make_early_inc_range(GV->users())) { | ||||
| 296 | if (ConstantExpr *C = dyn_cast<ConstantExpr>(U)) | ||||
| 297 | AMDGPU::replaceConstantUsesInFunction(C, F); | ||||
| 298 | } | ||||
| 299 | |||||
| 300 | GV->removeDeadConstantUsers(); | ||||
| 301 | |||||
| 302 | GV->replaceUsesWithIf(GEP, [F](Use &U) { | ||||
| 303 | Instruction *I = dyn_cast<Instruction>(U.getUser()); | ||||
| 304 | return I && I->getFunction() == F; | ||||
| 305 | }); | ||||
| 306 | } else { | ||||
| 307 | GV->replaceAllUsesWith(GEP); | ||||
| 308 | } | ||||
| 309 | if (GV->use_empty()) { | ||||
| 310 | UsedList.erase(GV); | ||||
| 311 | GV->eraseFromParent(); | ||||
| 312 | } | ||||
| 313 | |||||
| 314 | uint64_t Off = DL.getStructLayout(LDSTy)->getElementOffset(I); | ||||
| 315 | Align A = commonAlignment(StructAlign, Off); | ||||
| 316 | refineUsesAlignment(GEP, A, DL); | ||||
| 317 | } | ||||
| 318 | |||||
| 319 | // Mark kernels with asm that reads the address of the allocated structure | ||||
| 320 | // This is not necessary for lowering. This lets other passes, specifically | ||||
| 321 | // PromoteAlloca, accurately calculate how much LDS will be used by the | ||||
| 322 | // kernel after lowering. | ||||
| 323 | if (!F) { | ||||
| 324 | IRBuilder<> Builder(Ctx); | ||||
| 325 | SmallPtrSet<Function *, 32> Kernels; | ||||
| 326 | for (auto &I : M.functions()) { | ||||
| 327 | Function *Func = &I; | ||||
| 328 | if (AMDGPU::isKernelCC(Func) && !Kernels.contains(Func)) { | ||||
| 329 | markUsedByKernel(Builder, Func, SGV); | ||||
| 330 | Kernels.insert(Func); | ||||
| 331 | } | ||||
| 332 | } | ||||
| 333 | } | ||||
| 334 | return true; | ||||
| 335 | } | ||||
| 336 | |||||
| 337 | void refineUsesAlignment(Value *Ptr, Align A, const DataLayout &DL, | ||||
| 338 | unsigned MaxDepth = 5) { | ||||
| 339 | if (!MaxDepth
| ||||
| 340 | return; | ||||
| 341 | |||||
| 342 | for (User *U : Ptr->users()) { | ||||
| 343 | if (auto *LI = dyn_cast<LoadInst>(U)) { | ||||
| 344 | LI->setAlignment(std::max(A, LI->getAlign())); | ||||
| 345 | continue; | ||||
| 346 | } | ||||
| 347 | if (auto *SI = dyn_cast<StoreInst>(U)) { | ||||
| 348 | if (SI->getPointerOperand() == Ptr) | ||||
| 349 | SI->setAlignment(std::max(A, SI->getAlign())); | ||||
| 350 | continue; | ||||
| 351 | } | ||||
| 352 | if (auto *AI = dyn_cast<AtomicRMWInst>(U)) { | ||||
| 353 | // None of atomicrmw operations can work on pointers, but let's | ||||
| 354 | // check it anyway in case it will or we will process ConstantExpr. | ||||
| 355 | if (AI->getPointerOperand() == Ptr) | ||||
| 356 | AI->setAlignment(std::max(A, AI->getAlign())); | ||||
| 357 | continue; | ||||
| 358 | } | ||||
| 359 | if (auto *AI = dyn_cast<AtomicCmpXchgInst>(U)) { | ||||
| 360 | if (AI->getPointerOperand() == Ptr) | ||||
| 361 | AI->setAlignment(std::max(A, AI->getAlign())); | ||||
| 362 | continue; | ||||
| 363 | } | ||||
| 364 | if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) { | ||||
| 365 | unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); | ||||
| 366 | APInt Off(BitWidth, 0); | ||||
| 367 | if (GEP->getPointerOperand() == Ptr && | ||||
| 368 | GEP->accumulateConstantOffset(DL, Off)) { | ||||
| 369 | Align GA = commonAlignment(A, Off.getLimitedValue()); | ||||
| 370 | refineUsesAlignment(GEP, GA, DL, MaxDepth - 1); | ||||
| 371 | } | ||||
| 372 | continue; | ||||
| 373 | } | ||||
| 374 | if (auto *I = dyn_cast<Instruction>(U)) { | ||||
| 375 | if (I->getOpcode() == Instruction::BitCast || | ||||
| 376 | I->getOpcode() == Instruction::AddrSpaceCast) | ||||
| 377 | refineUsesAlignment(I, A, DL, MaxDepth - 1); | ||||
| 378 | } | ||||
| 379 | } | ||||
| 380 | } | ||||
| 381 | }; | ||||
| 382 | |||||
| 383 | } // namespace | ||||
| 384 | char AMDGPULowerModuleLDS::ID = 0; | ||||
| 385 | |||||
| 386 | char &llvm::AMDGPULowerModuleLDSID = AMDGPULowerModuleLDS::ID; | ||||
| 387 | |||||
| 388 | INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE,static void *initializeAMDGPULowerModuleLDSPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Lower uses of LDS variables from non-kernel functions" , "amdgpu-lower-module-lds", &AMDGPULowerModuleLDS::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPULowerModuleLDS>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPULowerModuleLDSPassFlag; void llvm::initializeAMDGPULowerModuleLDSPass(PassRegistry &Registry ) { llvm::call_once(InitializeAMDGPULowerModuleLDSPassFlag, initializeAMDGPULowerModuleLDSPassOnce , std::ref(Registry)); } | ||||
| 389 | "Lower uses of LDS variables from non-kernel functions", false,static void *initializeAMDGPULowerModuleLDSPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Lower uses of LDS variables from non-kernel functions" , "amdgpu-lower-module-lds", &AMDGPULowerModuleLDS::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPULowerModuleLDS>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPULowerModuleLDSPassFlag; void llvm::initializeAMDGPULowerModuleLDSPass(PassRegistry &Registry ) { llvm::call_once(InitializeAMDGPULowerModuleLDSPassFlag, initializeAMDGPULowerModuleLDSPassOnce , std::ref(Registry)); } | ||||
| 390 | false)static void *initializeAMDGPULowerModuleLDSPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Lower uses of LDS variables from non-kernel functions" , "amdgpu-lower-module-lds", &AMDGPULowerModuleLDS::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPULowerModuleLDS>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPULowerModuleLDSPassFlag; void llvm::initializeAMDGPULowerModuleLDSPass(PassRegistry &Registry ) { llvm::call_once(InitializeAMDGPULowerModuleLDSPassFlag, initializeAMDGPULowerModuleLDSPassOnce , std::ref(Registry)); } | ||||
| 391 | |||||
| 392 | ModulePass *llvm::createAMDGPULowerModuleLDSPass() { | ||||
| 393 | return new AMDGPULowerModuleLDS(); | ||||
| 394 | } | ||||
| 395 | |||||
| 396 | PreservedAnalyses AMDGPULowerModuleLDSPass::run(Module &M, | ||||
| 397 | ModuleAnalysisManager &) { | ||||
| 398 | return AMDGPULowerModuleLDS().runOnModule(M) ? PreservedAnalyses::none() | ||||
| |||||
| 399 | : PreservedAnalyses::all(); | ||||
| 400 | } |
| 1 | //===-- llvm/Support/Alignment.h - Useful alignment functions ---*- C++ -*-===// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | // | |||
| 9 | // This file contains types to represent alignments. | |||
| 10 | // They are instrumented to guarantee some invariants are preserved and prevent | |||
| 11 | // invalid manipulations. | |||
| 12 | // | |||
| 13 | // - Align represents an alignment in bytes, it is always set and always a valid | |||
| 14 | // power of two, its minimum value is 1 which means no alignment requirements. | |||
| 15 | // | |||
| 16 | // - MaybeAlign is an optional type, it may be undefined or set. When it's set | |||
| 17 | // you can get the underlying Align type by using the getValue() method. | |||
| 18 | // | |||
| 19 | //===----------------------------------------------------------------------===// | |||
| 20 | ||||
| 21 | #ifndef LLVM_SUPPORT_ALIGNMENT_H_ | |||
| 22 | #define LLVM_SUPPORT_ALIGNMENT_H_ | |||
| 23 | ||||
| 24 | #include "llvm/ADT/Optional.h" | |||
| 25 | #include "llvm/Support/MathExtras.h" | |||
| 26 | #include <cassert> | |||
| 27 | #ifndef NDEBUG1 | |||
| 28 | #include <string> | |||
| 29 | #endif // NDEBUG | |||
| 30 | ||||
| 31 | namespace llvm { | |||
| 32 | ||||
| 33 | #define ALIGN_CHECK_ISPOSITIVE(decl) \ | |||
| 34 | assert(decl > 0 && (#decl " should be defined"))((void)0) | |||
| 35 | ||||
| 36 | /// This struct is a compact representation of a valid (non-zero power of two) | |||
| 37 | /// alignment. | |||
| 38 | /// It is suitable for use as static global constants. | |||
| 39 | struct Align { | |||
| 40 | private: | |||
| 41 | uint8_t ShiftValue = 0; /// The log2 of the required alignment. | |||
| 42 | /// ShiftValue is less than 64 by construction. | |||
| 43 | ||||
| 44 | friend struct MaybeAlign; | |||
| 45 | friend unsigned Log2(Align); | |||
| 46 | friend bool operator==(Align Lhs, Align Rhs); | |||
| 47 | friend bool operator!=(Align Lhs, Align Rhs); | |||
| 48 | friend bool operator<=(Align Lhs, Align Rhs); | |||
| 49 | friend bool operator>=(Align Lhs, Align Rhs); | |||
| 50 | friend bool operator<(Align Lhs, Align Rhs); | |||
| 51 | friend bool operator>(Align Lhs, Align Rhs); | |||
| 52 | friend unsigned encode(struct MaybeAlign A); | |||
| 53 | friend struct MaybeAlign decodeMaybeAlign(unsigned Value); | |||
| 54 | ||||
| 55 | /// A trivial type to allow construction of constexpr Align. | |||
| 56 | /// This is currently needed to workaround a bug in GCC 5.3 which prevents | |||
| 57 | /// definition of constexpr assign operators. | |||
| 58 | /// https://stackoverflow.com/questions/46756288/explicitly-defaulted-function-cannot-be-declared-as-constexpr-because-the-implic | |||
| 59 | /// FIXME: Remove this, make all assign operators constexpr and introduce user | |||
| 60 | /// defined literals when we don't have to support GCC 5.3 anymore. | |||
| 61 | /// https://llvm.org/docs/GettingStarted.html#getting-a-modern-host-c-toolchain | |||
| 62 | struct LogValue { | |||
| 63 | uint8_t Log; | |||
| 64 | }; | |||
| 65 | ||||
| 66 | public: | |||
| 67 | /// Default is byte-aligned. | |||
| 68 | constexpr Align() = default; | |||
| 69 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
| 70 | /// checks have been performed when building `Other`. | |||
| 71 | constexpr Align(const Align &Other) = default; | |||
| 72 | constexpr Align(Align &&Other) = default; | |||
| 73 | Align &operator=(const Align &Other) = default; | |||
| 74 | Align &operator=(Align &&Other) = default; | |||
| 75 | ||||
| 76 | explicit Align(uint64_t Value) { | |||
| 77 | assert(Value > 0 && "Value must not be 0")((void)0); | |||
| 78 | assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2")((void)0); | |||
| 79 | ShiftValue = Log2_64(Value); | |||
| 80 | assert(ShiftValue < 64 && "Broken invariant")((void)0); | |||
| 81 | } | |||
| 82 | ||||
| 83 | /// This is a hole in the type system and should not be abused. | |||
| 84 | /// Needed to interact with C for instance. | |||
| 85 | uint64_t value() const { return uint64_t(1) << ShiftValue; } | |||
| ||||
| 86 | ||||
| 87 | /// Allow constructions of constexpr Align. | |||
| 88 | template <size_t kValue> constexpr static LogValue Constant() { | |||
| 89 | return LogValue{static_cast<uint8_t>(CTLog2<kValue>())}; | |||
| 90 | } | |||
| 91 | ||||
| 92 | /// Allow constructions of constexpr Align from types. | |||
| 93 | /// Compile time equivalent to Align(alignof(T)). | |||
| 94 | template <typename T> constexpr static LogValue Of() { | |||
| 95 | return Constant<std::alignment_of<T>::value>(); | |||
| 96 | } | |||
| 97 | ||||
| 98 | /// Constexpr constructor from LogValue type. | |||
| 99 | constexpr Align(LogValue CA) : ShiftValue(CA.Log) {} | |||
| 100 | }; | |||
| 101 | ||||
| 102 | /// Treats the value 0 as a 1, so Align is always at least 1. | |||
| 103 | inline Align assumeAligned(uint64_t Value) { | |||
| 104 | return Value ? Align(Value) : Align(); | |||
| 105 | } | |||
| 106 | ||||
| 107 | /// This struct is a compact representation of a valid (power of two) or | |||
| 108 | /// undefined (0) alignment. | |||
| 109 | struct MaybeAlign : public llvm::Optional<Align> { | |||
| 110 | private: | |||
| 111 | using UP = llvm::Optional<Align>; | |||
| 112 | ||||
| 113 | public: | |||
| 114 | /// Default is undefined. | |||
| 115 | MaybeAlign() = default; | |||
| 116 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
| 117 | /// checks have been performed when building `Other`. | |||
| 118 | MaybeAlign(const MaybeAlign &Other) = default; | |||
| 119 | MaybeAlign &operator=(const MaybeAlign &Other) = default; | |||
| 120 | MaybeAlign(MaybeAlign &&Other) = default; | |||
| 121 | MaybeAlign &operator=(MaybeAlign &&Other) = default; | |||
| 122 | ||||
| 123 | /// Use llvm::Optional<Align> constructor. | |||
| 124 | using UP::UP; | |||
| 125 | ||||
| 126 | explicit MaybeAlign(uint64_t Value) { | |||
| 127 | assert((Value == 0 || llvm::isPowerOf2_64(Value)) &&((void)0) | |||
| 128 | "Alignment is neither 0 nor a power of 2")((void)0); | |||
| 129 | if (Value) | |||
| 130 | emplace(Value); | |||
| 131 | } | |||
| 132 | ||||
| 133 | /// For convenience, returns a valid alignment or 1 if undefined. | |||
| 134 | Align valueOrOne() const { return hasValue() ? getValue() : Align(); } | |||
| 135 | }; | |||
| 136 | ||||
| 137 | /// Checks that SizeInBytes is a multiple of the alignment. | |||
| 138 | inline bool isAligned(Align Lhs, uint64_t SizeInBytes) { | |||
| 139 | return SizeInBytes % Lhs.value() == 0; | |||
| 140 | } | |||
| 141 | ||||
| 142 | /// Checks that Addr is a multiple of the alignment. | |||
| 143 | inline bool isAddrAligned(Align Lhs, const void *Addr) { | |||
| 144 | return isAligned(Lhs, reinterpret_cast<uintptr_t>(Addr)); | |||
| 145 | } | |||
| 146 | ||||
| 147 | /// Returns a multiple of A needed to store `Size` bytes. | |||
| 148 | inline uint64_t alignTo(uint64_t Size, Align A) { | |||
| 149 | const uint64_t Value = A.value(); | |||
| 150 | // The following line is equivalent to `(Size + Value - 1) / Value * Value`. | |||
| 151 | ||||
| 152 | // The division followed by a multiplication can be thought of as a right | |||
| 153 | // shift followed by a left shift which zeros out the extra bits produced in | |||
| 154 | // the bump; `~(Value - 1)` is a mask where all those bits being zeroed out | |||
| 155 | // are just zero. | |||
| 156 | ||||
| 157 | // Most compilers can generate this code but the pattern may be missed when | |||
| 158 | // multiple functions gets inlined. | |||
| 159 | return (Size + Value - 1) & ~(Value - 1U); | |||
| 160 | } | |||
| 161 | ||||
| 162 | /// If non-zero \p Skew is specified, the return value will be a minimal integer | |||
| 163 | /// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for | |||
| 164 | /// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p | |||
| 165 | /// Skew mod \p A'. | |||
| 166 | /// | |||
| 167 | /// Examples: | |||
| 168 | /// \code | |||
| 169 | /// alignTo(5, Align(8), 7) = 7 | |||
| 170 | /// alignTo(17, Align(8), 1) = 17 | |||
| 171 | /// alignTo(~0LL, Align(8), 3) = 3 | |||
| 172 | /// \endcode | |||
| 173 | inline uint64_t alignTo(uint64_t Size, Align A, uint64_t Skew) { | |||
| 174 | const uint64_t Value = A.value(); | |||
| 175 | Skew %= Value; | |||
| 176 | return ((Size + Value - 1 - Skew) & ~(Value - 1U)) + Skew; | |||
| 177 | } | |||
| 178 | ||||
| 179 | /// Returns a multiple of A needed to store `Size` bytes. | |||
| 180 | /// Returns `Size` if current alignment is undefined. | |||
| 181 | inline uint64_t alignTo(uint64_t Size, MaybeAlign A) { | |||
| 182 | return A ? alignTo(Size, A.getValue()) : Size; | |||
| 183 | } | |||
| 184 | ||||
| 185 | /// Aligns `Addr` to `Alignment` bytes, rounding up. | |||
| 186 | inline uintptr_t alignAddr(const void *Addr, Align Alignment) { | |||
| 187 | uintptr_t ArithAddr = reinterpret_cast<uintptr_t>(Addr); | |||
| 188 | assert(static_cast<uintptr_t>(ArithAddr + Alignment.value() - 1) >=((void)0) | |||
| 189 | ArithAddr &&((void)0) | |||
| 190 | "Overflow")((void)0); | |||
| 191 | return alignTo(ArithAddr, Alignment); | |||
| 192 | } | |||
| 193 | ||||
| 194 | /// Returns the offset to the next integer (mod 2**64) that is greater than | |||
| 195 | /// or equal to \p Value and is a multiple of \p Align. | |||
| 196 | inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) { | |||
| 197 | return alignTo(Value, Alignment) - Value; | |||
| 198 | } | |||
| 199 | ||||
| 200 | /// Returns the necessary adjustment for aligning `Addr` to `Alignment` | |||
| 201 | /// bytes, rounding up. | |||
| 202 | inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) { | |||
| 203 | return offsetToAlignment(reinterpret_cast<uintptr_t>(Addr), Alignment); | |||
| 204 | } | |||
| 205 | ||||
| 206 | /// Returns the log2 of the alignment. | |||
| 207 | inline unsigned Log2(Align A) { return A.ShiftValue; } | |||
| 208 | ||||
| 209 | /// Returns the alignment that satisfies both alignments. | |||
| 210 | /// Same semantic as MinAlign. | |||
| 211 | inline Align commonAlignment(Align A, Align B) { return std::min(A, B); } | |||
| 212 | ||||
| 213 | /// Returns the alignment that satisfies both alignments. | |||
| 214 | /// Same semantic as MinAlign. | |||
| 215 | inline Align commonAlignment(Align A, uint64_t Offset) { | |||
| 216 | return Align(MinAlign(A.value(), Offset)); | |||
| 217 | } | |||
| 218 | ||||
| 219 | /// Returns the alignment that satisfies both alignments. | |||
| 220 | /// Same semantic as MinAlign. | |||
| 221 | inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) { | |||
| 222 | return A && B ? commonAlignment(*A, *B) : A ? A : B; | |||
| 223 | } | |||
| 224 | ||||
| 225 | /// Returns the alignment that satisfies both alignments. | |||
| 226 | /// Same semantic as MinAlign. | |||
| 227 | inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) { | |||
| 228 | return MaybeAlign(MinAlign((*A).value(), Offset)); | |||
| 229 | } | |||
| 230 | ||||
| 231 | /// Returns a representation of the alignment that encodes undefined as 0. | |||
| 232 | inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; } | |||
| 233 | ||||
| 234 | /// Dual operation of the encode function above. | |||
| 235 | inline MaybeAlign decodeMaybeAlign(unsigned Value) { | |||
| 236 | if (Value == 0) | |||
| 237 | return MaybeAlign(); | |||
| 238 | Align Out; | |||
| 239 | Out.ShiftValue = Value - 1; | |||
| 240 | return Out; | |||
| 241 | } | |||
| 242 | ||||
| 243 | /// Returns a representation of the alignment, the encoded value is positive by | |||
| 244 | /// definition. | |||
| 245 | inline unsigned encode(Align A) { return encode(MaybeAlign(A)); } | |||
| 246 | ||||
| 247 | /// Comparisons between Align and scalars. Rhs must be positive. | |||
| 248 | inline bool operator==(Align Lhs, uint64_t Rhs) { | |||
| 249 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 250 | return Lhs.value() == Rhs; | |||
| 251 | } | |||
| 252 | inline bool operator!=(Align Lhs, uint64_t Rhs) { | |||
| 253 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 254 | return Lhs.value() != Rhs; | |||
| 255 | } | |||
| 256 | inline bool operator<=(Align Lhs, uint64_t Rhs) { | |||
| 257 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 258 | return Lhs.value() <= Rhs; | |||
| 259 | } | |||
| 260 | inline bool operator>=(Align Lhs, uint64_t Rhs) { | |||
| 261 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 262 | return Lhs.value() >= Rhs; | |||
| 263 | } | |||
| 264 | inline bool operator<(Align Lhs, uint64_t Rhs) { | |||
| 265 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 266 | return Lhs.value() < Rhs; | |||
| 267 | } | |||
| 268 | inline bool operator>(Align Lhs, uint64_t Rhs) { | |||
| 269 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 270 | return Lhs.value() > Rhs; | |||
| 271 | } | |||
| 272 | ||||
| 273 | /// Comparisons between MaybeAlign and scalars. | |||
| 274 | inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) { | |||
| 275 | return Lhs ? (*Lhs).value() == Rhs : Rhs == 0; | |||
| 276 | } | |||
| 277 | inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) { | |||
| 278 | return Lhs ? (*Lhs).value() != Rhs : Rhs != 0; | |||
| 279 | } | |||
| 280 | ||||
| 281 | /// Comparisons operators between Align. | |||
| 282 | inline bool operator==(Align Lhs, Align Rhs) { | |||
| 283 | return Lhs.ShiftValue == Rhs.ShiftValue; | |||
| 284 | } | |||
| 285 | inline bool operator!=(Align Lhs, Align Rhs) { | |||
| 286 | return Lhs.ShiftValue != Rhs.ShiftValue; | |||
| 287 | } | |||
| 288 | inline bool operator<=(Align Lhs, Align Rhs) { | |||
| 289 | return Lhs.ShiftValue <= Rhs.ShiftValue; | |||
| 290 | } | |||
| 291 | inline bool operator>=(Align Lhs, Align Rhs) { | |||
| 292 | return Lhs.ShiftValue >= Rhs.ShiftValue; | |||
| 293 | } | |||
| 294 | inline bool operator<(Align Lhs, Align Rhs) { | |||
| 295 | return Lhs.ShiftValue < Rhs.ShiftValue; | |||
| 296 | } | |||
| 297 | inline bool operator>(Align Lhs, Align Rhs) { | |||
| 298 | return Lhs.ShiftValue > Rhs.ShiftValue; | |||
| 299 | } | |||
| 300 | ||||
| 301 | // Don't allow relational comparisons with MaybeAlign. | |||
| 302 | bool operator<=(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 303 | bool operator>=(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 304 | bool operator<(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 305 | bool operator>(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 306 | ||||
| 307 | bool operator<=(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 308 | bool operator>=(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 309 | bool operator<(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 310 | bool operator>(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 311 | ||||
| 312 | bool operator<=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 313 | bool operator>=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 314 | bool operator<(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 315 | bool operator>(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 316 | ||||
| 317 | inline Align operator*(Align Lhs, uint64_t Rhs) { | |||
| 318 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
| 319 | return Align(Lhs.value() * Rhs); | |||
| 320 | } | |||
| 321 | ||||
| 322 | inline MaybeAlign operator*(MaybeAlign Lhs, uint64_t Rhs) { | |||
| 323 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
| 324 | return Lhs ? Lhs.getValue() * Rhs : MaybeAlign(); | |||
| 325 | } | |||
| 326 | ||||
| 327 | inline Align operator/(Align Lhs, uint64_t Divisor) { | |||
| 328 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
| 329 | "Divisor must be positive and a power of 2")((void)0); | |||
| 330 | assert(Lhs != 1 && "Can't halve byte alignment")((void)0); | |||
| 331 | return Align(Lhs.value() / Divisor); | |||
| 332 | } | |||
| 333 | ||||
| 334 | inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) { | |||
| 335 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
| 336 | "Divisor must be positive and a power of 2")((void)0); | |||
| 337 | return Lhs ? Lhs.getValue() / Divisor : MaybeAlign(); | |||
| 338 | } | |||
| 339 | ||||
| 340 | inline Align max(MaybeAlign Lhs, Align Rhs) { | |||
| 341 | return Lhs && *Lhs > Rhs ? *Lhs : Rhs; | |||
| 342 | } | |||
| 343 | ||||
| 344 | inline Align max(Align Lhs, MaybeAlign Rhs) { | |||
| 345 | return Rhs && *Rhs > Lhs ? *Rhs : Lhs; | |||
| 346 | } | |||
| 347 | ||||
| 348 | #ifndef NDEBUG1 | |||
| 349 | // For usage in LLVM_DEBUG macros. | |||
| 350 | inline std::string DebugStr(const Align &A) { | |||
| 351 | return std::to_string(A.value()); | |||
| 352 | } | |||
| 353 | // For usage in LLVM_DEBUG macros. | |||
| 354 | inline std::string DebugStr(const MaybeAlign &MA) { | |||
| 355 | if (MA) | |||
| 356 | return std::to_string(MA->value()); | |||
| 357 | return "None"; | |||
| 358 | } | |||
| 359 | #endif // NDEBUG | |||
| 360 | ||||
| 361 | #undef ALIGN_CHECK_ISPOSITIVE | |||
| 362 | ||||
| 363 | } // namespace llvm | |||
| 364 | ||||
| 365 | #endif // LLVM_SUPPORT_ALIGNMENT_H_ |