| File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86PreAMXConfig.cpp |
| Warning: | line 239, column 28 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | //===- Target/X86/X86PreAMXConfig.cpp - ------------------------*- C++ -*-===// | ||||
| 2 | // | ||||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
| 6 | // | ||||
| 7 | //===----------------------------------------------------------------------===// | ||||
| 8 | // | ||||
| 9 | /// Insert tilecfg for each area of key AMX intrinsic. | ||||
| 10 | /// All the key AMX intrinsic's tile operand must come from tileload. And the | ||||
| 11 | /// def tile of key AMX intrinsic must be tilestored. | ||||
| 12 | /// take tdpbssd for example: | ||||
| 13 | /// -------------------------------------------------------------------------- | ||||
| 14 | /// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(...) key | ||||
| 15 | /// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(...) | | ||||
| 16 | /// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(...) amx | ||||
| 17 | /// %td = tail call x86_amx @llvm.x86.tdpbssd.internal(t1, t2, t3) | | ||||
| 18 | /// call void @llvm.x86.tilestored64.internal(... td) area | ||||
| 19 | /// -------------------------------------------------------------------------- | ||||
| 20 | /// This pass will insert tilecfg before every key-amx-area, some like: | ||||
| 21 | /// -------------------------------------------------------------------------- | ||||
| 22 | /// %cfgmem = alloca <16 x i32>, align 4 * allocate mem | ||||
| 23 | /// store <16 x i32> zeroinitializer, <16 x i32>* %cfgmem * zero init | ||||
| 24 | /// ... | ||||
| 25 | /// ... pre-config shape of %t1 * | ||||
| 26 | /// store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 * | ||||
| 27 | /// store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config | ||||
| 28 | /// ... * | ||||
| 29 | /// ... pre-config shape of %t2 * shapes | ||||
| 30 | /// store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 * | ||||
| 31 | /// store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 * | ||||
| 32 | /// ... | ||||
| 33 | /// call void @llvm.x86.ldtilecfg(i8* %cfgmem) * tile config | ||||
| 34 | // | ||||
| 35 | //===----------------------------------------------------------------------===// | ||||
| 36 | // | ||||
| 37 | #include "X86.h" | ||||
| 38 | #include "llvm/ADT/SmallSet.h" | ||||
| 39 | #include "llvm/Analysis/TargetTransformInfo.h" | ||||
| 40 | #include "llvm/CodeGen/Passes.h" | ||||
| 41 | #include "llvm/CodeGen/TargetPassConfig.h" | ||||
| 42 | #include "llvm/CodeGen/ValueTypes.h" | ||||
| 43 | #include "llvm/IR/DataLayout.h" | ||||
| 44 | #include "llvm/IR/Function.h" | ||||
| 45 | #include "llvm/IR/IRBuilder.h" | ||||
| 46 | #include "llvm/IR/Instructions.h" | ||||
| 47 | #include "llvm/IR/IntrinsicInst.h" | ||||
| 48 | #include "llvm/IR/IntrinsicsX86.h" | ||||
| 49 | #include "llvm/IR/PatternMatch.h" | ||||
| 50 | #include "llvm/InitializePasses.h" | ||||
| 51 | #include "llvm/Pass.h" | ||||
| 52 | #include "llvm/Support/raw_ostream.h" | ||||
| 53 | #include "llvm/Target/TargetMachine.h" | ||||
| 54 | |||||
| 55 | using namespace llvm; | ||||
| 56 | using namespace PatternMatch; | ||||
| 57 | |||||
| 58 | #define DEBUG_TYPE"pre-amx-config" "pre-amx-config" | ||||
| 59 | |||||
| 60 | static bool isAMXIntrinsic(IntrinsicInst *II) { | ||||
| 61 | for (Value *Operand : II->operands()) | ||||
| 62 | if (Operand->getType()->isX86_AMXTy()) | ||||
| 63 | return true; | ||||
| 64 | return II->getType()->isX86_AMXTy(); | ||||
| 65 | } | ||||
| 66 | |||||
| 67 | static bool isTileLoad(IntrinsicInst *II) { | ||||
| 68 | return II->getIntrinsicID() == Intrinsic::x86_tileloadd64_internal || | ||||
| 69 | II->getIntrinsicID() == Intrinsic::x86_tileloaddt164_internal; | ||||
| 70 | } | ||||
| 71 | |||||
| 72 | static bool isTileStore(IntrinsicInst *II) { | ||||
| 73 | return II->getIntrinsicID() == Intrinsic::x86_tilestored64_internal; | ||||
| 74 | } | ||||
| 75 | |||||
| 76 | #ifndef NDEBUG1 | ||||
| 77 | static bool onlyTileDef(IntrinsicInst *II) { | ||||
| 78 | for (Value *Operand : II->operands()) | ||||
| 79 | if (Operand->getType()->isX86_AMXTy()) | ||||
| 80 | return false; | ||||
| 81 | return II->getType()->isX86_AMXTy(); | ||||
| 82 | } | ||||
| 83 | |||||
| 84 | static bool brokenVolatile(Instruction *I) { | ||||
| 85 | // Todo: it is weak to identify a normal call here. | ||||
| 86 | if ((isa<CallInst>(I) && !isa<IntrinsicInst>(I)) || I->isTerminator()) | ||||
| 87 | return true; | ||||
| 88 | return false; | ||||
| 89 | } | ||||
| 90 | #endif | ||||
| 91 | |||||
| 92 | namespace { | ||||
| 93 | class X86PreAMXConfig { | ||||
| 94 | Function &F; | ||||
| 95 | |||||
| 96 | public: | ||||
| 97 | X86PreAMXConfig(Function &Func) : F(Func) {} | ||||
| 98 | bool preTileConfig(); | ||||
| 99 | bool addTileConfig(Instruction *ModelStart, SmallVector<Value *, 8> &Shapes); | ||||
| 100 | bool findConfigShapes( | ||||
| 101 | DenseMap<Instruction *, SmallVector<Value *, 8>> &PosAndShapes); | ||||
| 102 | bool getKeyAMXShapes(IntrinsicInst *KeyAMX, SmallVector<Value *, 8> &Shapes); | ||||
| 103 | bool preWriteTileCfg(Value *I8Ptr, Instruction *Pos, | ||||
| 104 | SmallVector<Value *, 8> &Shapes); | ||||
| 105 | BasicBlock::iterator | ||||
| 106 | getShapesAndConfigPosEnd(BasicBlock::iterator Iter, | ||||
| 107 | SmallVector<Value *, 8> &Shapes); | ||||
| 108 | bool checkVolatileModel(SmallSet<Value *, 4> &Loads, IntrinsicInst *Store, | ||||
| 109 | IntrinsicInst *KeyAMX); | ||||
| 110 | }; | ||||
| 111 | |||||
| 112 | // Orderly write the shapes in tilecfg's mem. This maybe not right. | ||||
| 113 | // Because the first shape may not corresponding to the first tmm register, | ||||
| 114 | // so we need to handle at at X86FastTileConfig::materializeTileCfg() | ||||
| 115 | // after register allocation. | ||||
| 116 | // For example: | ||||
| 117 | // -------------------------------------------------------------------------- | ||||
| 118 | // zeroinitialize tilecfg's mem (of ldtilecfg) | ||||
| 119 | // -------------------------------------------------------------------------- | ||||
| 120 | // ... pre-config shape of %t1 * | ||||
| 121 | // %amx.tmm.0.shape.row = getelementptr i8, i8* %mem, i64 48 * | ||||
| 122 | // %amx.tmm.0.shape.col = getelementptr i16, i16* %mem, i64 16 * | ||||
| 123 | // store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 * | ||||
| 124 | // store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config | ||||
| 125 | // ... * | ||||
| 126 | // ... pre-config shape of %t2 * | ||||
| 127 | // %amx.tmm.1.shape.row = getelementptr i8, i8* %mem, i64 49 * | ||||
| 128 | // %amx.tmm.1.shape.col = getelementptr i16, i16* %mem, i64 18 * | ||||
| 129 | // store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 * shapes | ||||
| 130 | // store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 * | ||||
| 131 | // ... * | ||||
| 132 | // ... pre-config shape of %t3 * of | ||||
| 133 | // %amx.tmm.2.shape.row = getelementptr i8, i8* %mem, i64 50 * | ||||
| 134 | // %amx.tmm.2.shape.col = getelementptr i16, i16* %mem, i64 20 * | ||||
| 135 | // store volatile i8 %m, i8* %amx.tmm.2.shape.row, align 1 * | ||||
| 136 | // store volatile i16 %n, i16* %amx.tmm.2.shape.col, align 2 * | ||||
| 137 | // ... * tiles | ||||
| 138 | // ... pre-config shape of %td * | ||||
| 139 | // %amx.tmm.3.shape.row = getelementptr i8, i8* %mem, i64 51 * | ||||
| 140 | // %amx.tmm.3.shape.col = getelementptr i16, i16* %mem, i64 22 * | ||||
| 141 | // store volatile i8 %m, i8* %amx.tmm.3.shape.row, align 1 * | ||||
| 142 | // store volatile i16 %n, i16* %amx.tmm.3.shape.col, align 2 * | ||||
| 143 | // -------------------------------------------------------------------------- | ||||
| 144 | // call void @llvm.x86.ldtilecfg(i8* %mem) * tile config | ||||
| 145 | // -------------------------------------------------------------------------- | ||||
| 146 | // %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) key | ||||
| 147 | // %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...) | ||||
| 148 | // %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) amx | ||||
| 149 | // %td = tail call x86_amx @llvm.x86.tdpbssd.internal(m, n, k, t1, t2, t3) | ||||
| 150 | // call void @llvm.x86.tilestored64.internal(... td) area | ||||
| 151 | // -------------------------------------------------------------------------- | ||||
| 152 | bool X86PreAMXConfig::preWriteTileCfg(Value *I8Ptr, Instruction *Pos, | ||||
| 153 | SmallVector<Value *, 8> &Shapes) { | ||||
| 154 | bool Write = false; | ||||
| 155 | LLVMContext &Ctx = Pos->getParent()->getContext(); | ||||
| 156 | Type *I8Ty = Type::getInt8Ty(Ctx); | ||||
| 157 | Type *I16Ty = Type::getInt16Ty(Ctx); | ||||
| 158 | |||||
| 159 | // TODO: Currently we defaultly set Palette = 1, it may be assigned to | ||||
| 160 | // other value in the future. | ||||
| 161 | Value *PaletteOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 0); | ||||
| 162 | Value *PaletteValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1); | ||||
| 163 | Value *PalettePos = | ||||
| 164 | GetElementPtrInst::Create(I8Ty, I8Ptr, PaletteOffset, "", Pos); | ||||
| 165 | new StoreInst(PaletteValue, PalettePos, Pos); | ||||
| 166 | |||||
| 167 | for (int I = 0, E = Shapes.size() / 2; I < E; I++) { | ||||
| 168 | Value *RowOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 48 + I); | ||||
| 169 | Value *ColOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 16 + I * 2); | ||||
| 170 | const std::string ShapeName = "amx.tmm." + itostr(I); | ||||
| 171 | Value *RowPos = GetElementPtrInst::Create(I8Ty, I8Ptr, RowOffset, | ||||
| 172 | ShapeName + ".shape.row", Pos); | ||||
| 173 | Value *ColPos = GetElementPtrInst::Create(I8Ty, I8Ptr, ColOffset, "", Pos); | ||||
| 174 | ColPos = new BitCastInst(ColPos, PointerType::get(I16Ty, 0), | ||||
| 175 | ShapeName + ".shape.col", Pos); | ||||
| 176 | Value *Row = Shapes[I * 2]; | ||||
| 177 | Value *Col = Shapes[I * 2 + 1]; | ||||
| 178 | Row = new TruncInst(Row, I8Ty, "", Pos); | ||||
| 179 | new StoreInst(Row, RowPos, Pos); | ||||
| 180 | new StoreInst(Col, ColPos, Pos); | ||||
| 181 | Write = true; | ||||
| 182 | } | ||||
| 183 | return Write; | ||||
| 184 | } | ||||
| 185 | |||||
| 186 | bool X86PreAMXConfig::addTileConfig(Instruction *ModelStart, | ||||
| 187 | SmallVector<Value *, 8> &Shapes) { | ||||
| 188 | Module *M = F.getParent(); | ||||
| 189 | IRBuilder<> Builder(ModelStart); | ||||
| 190 | const DataLayout &DL = M->getDataLayout(); | ||||
| 191 | unsigned AddrSpace = DL.getAllocaAddrSpace(); | ||||
| 192 | LLVMContext &Ctx = Builder.getContext(); | ||||
| 193 | Type *V512Ty = VectorType::get(Builder.getInt32Ty(), 16, false); | ||||
| 194 | Align Alignment = DL.getPrefTypeAlign(Type::getInt32Ty(Ctx)); | ||||
| 195 | |||||
| 196 | AllocaInst *Addr = | ||||
| 197 | new AllocaInst(V512Ty, AddrSpace, "", &F.getEntryBlock().front()); | ||||
| 198 | Addr->setAlignment(Alignment); | ||||
| 199 | Value *I8Ptr = Builder.CreateBitCast(Addr, Builder.getInt8PtrTy()); | ||||
| 200 | |||||
| 201 | std::array<Value *, 1> Args = {I8Ptr}; | ||||
| 202 | Instruction *Cfg = | ||||
| 203 | Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, None, Args); | ||||
| 204 | |||||
| 205 | Value *Val0 = Constant::getNullValue(V512Ty); | ||||
| 206 | Instruction *Init0 = new StoreInst(Val0, Addr, false, Alignment, Cfg); | ||||
| 207 | assert(Init0 && "Not Zero initilizate the cfg mem!")((void)0); | ||||
| 208 | |||||
| 209 | preWriteTileCfg(I8Ptr, Cfg, Shapes); | ||||
| 210 | |||||
| 211 | return Init0; | ||||
| 212 | } | ||||
| 213 | |||||
| 214 | // Todo: We may need to handle "more than one store" case in the future. | ||||
| 215 | bool X86PreAMXConfig::checkVolatileModel(SmallSet<Value *, 4> &Loads, | ||||
| 216 | IntrinsicInst *Store, | ||||
| 217 | IntrinsicInst *KeyAMX) { | ||||
| 218 | Value *ST = Store->getOperand(4); | ||||
| 219 | |||||
| 220 | // Only has tileload and tilestore. | ||||
| 221 | if (!KeyAMX) | ||||
| 222 | return (Loads.size() == 1) && Loads.contains(ST); | ||||
| 223 | |||||
| 224 | // All Loads should be operands of KeyAMX. | ||||
| 225 | // All tile operands of KeyAMX should come from Loads. | ||||
| 226 | for (Value *Op : KeyAMX->operands()) { | ||||
| 227 | if (Op->getType()->isX86_AMXTy()) | ||||
| 228 | if (!Loads.erase(Op)) | ||||
| 229 | return false; | ||||
| 230 | } | ||||
| 231 | |||||
| 232 | // The def of KeyAMX should be stored into mem. | ||||
| 233 | // Todo: is it key amx can be no def? | ||||
| 234 | return Loads.empty() && (ST == cast<Value>(KeyAMX)); | ||||
| 235 | } | ||||
| 236 | |||||
| 237 | bool X86PreAMXConfig::getKeyAMXShapes(IntrinsicInst *KeyAMX, | ||||
| 238 | SmallVector<Value *, 8> &Shapes) { | ||||
| 239 | for (unsigned I = 0; I < KeyAMX->getNumOperands(); I++) { | ||||
| |||||
| 240 | Value *Op = KeyAMX->getOperand(I); | ||||
| 241 | if (!Op->getType()->isX86_AMXTy()) | ||||
| 242 | continue; | ||||
| 243 | IntrinsicInst *TileDef = dyn_cast<IntrinsicInst>(Op); | ||||
| 244 | assert((TileDef && isTileLoad(TileDef)) &&((void)0) | ||||
| 245 | "All KeyAMX's tile definiation should comes from TileLoad!")((void)0); | ||||
| 246 | Shapes.push_back(TileDef->getOperand(0)); | ||||
| 247 | Shapes.push_back(TileDef->getOperand(1)); | ||||
| 248 | } | ||||
| 249 | if (!isTileStore(KeyAMX)) { | ||||
| 250 | Shapes.push_back(KeyAMX->getOperand(0)); | ||||
| 251 | Shapes.push_back(KeyAMX->getOperand(1)); | ||||
| 252 | } | ||||
| 253 | return Shapes.size() != 0; | ||||
| 254 | } | ||||
| 255 | |||||
| 256 | // Collect the shapes and skip the area of current key amx intrinsic. | ||||
| 257 | // | ||||
| 258 | // For example: | ||||
| 259 | // ... | ||||
| 260 | // -------------------------------------------------------------------------- | ||||
| 261 | // %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) record (m,k) | ||||
| 262 | // %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...) record (m,k) | ||||
| 263 | // %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) record (m,k) | ||||
| 264 | // %td = call x86_amx @llvm.x86.tdpbssd.internal(...t1, t2, t3) | ||||
| 265 | // call void @llvm.x86.tilestored64.internal(m, n,... td) <--PosEnd record (m,k) | ||||
| 266 | // -------------------------------------------------------------------------- | ||||
| 267 | BasicBlock::iterator | ||||
| 268 | X86PreAMXConfig::getShapesAndConfigPosEnd(BasicBlock::iterator Iter, | ||||
| 269 | SmallVector<Value *, 8> &Shapes) { | ||||
| 270 | IntrinsicInst *KeyAMX = nullptr; | ||||
| 271 | BasicBlock *BB = Iter->getParent(); | ||||
| 272 | BasicBlock::iterator PosEnd = BB->end(); | ||||
| 273 | SmallSet<Value *, 4> Loads; | ||||
| 274 | |||||
| 275 | // See TileStore as "Config Position End" and check volatile model. | ||||
| 276 | for (auto I = Iter, E = BB->end(); I != E; ++I) { | ||||
| 277 | assert(!brokenVolatile(&*I) && "Not reach tile store!")((void)0); | ||||
| 278 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*I); | ||||
| 279 | if (!II || !isAMXIntrinsic(II)) | ||||
| 280 | continue; | ||||
| 281 | |||||
| 282 | if (isTileLoad(II)) { | ||||
| 283 | Loads.insert(II); | ||||
| 284 | } else if (isTileStore(II)) { | ||||
| 285 | if (!checkVolatileModel(Loads, II, KeyAMX)) | ||||
| 286 | report_fatal_error("Not Volatile AMX Model!"); | ||||
| 287 | PosEnd = I; | ||||
| 288 | break; | ||||
| 289 | } else { | ||||
| 290 | assert(!KeyAMX && "Too many key amx intrinsic!")((void)0); | ||||
| 291 | KeyAMX = II; | ||||
| 292 | } | ||||
| 293 | } | ||||
| 294 | assert(PosEnd != BB->end() && "Not find TileStore!")((void)0); | ||||
| 295 | |||||
| 296 | // See KeyAMX as TileStore if only TileLoad and TileStore. | ||||
| 297 | if (!KeyAMX
| ||||
| 298 | KeyAMX = dyn_cast<IntrinsicInst>(&*PosEnd); | ||||
| 299 | |||||
| 300 | // Get Shapes in order. | ||||
| 301 | assert(Shapes.empty() && "Shapes should be clean.")((void)0); | ||||
| 302 | getKeyAMXShapes(KeyAMX, Shapes); | ||||
| 303 | |||||
| 304 | return PosEnd; | ||||
| 305 | } | ||||
| 306 | |||||
| 307 | // Record a key amx area's shapes with its position. | ||||
| 308 | // Use the first tileload as its position. | ||||
| 309 | // For example: | ||||
| 310 | // ... | ||||
| 311 | // -------------------------------------------------------------------------- | ||||
| 312 | // %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) <-- pos | ||||
| 313 | // %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...) / | ||||
| 314 | // %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) shapes: | ||||
| 315 | // %td = call x86_amx @llvm.x86.tdpbssd.internal(...t1, t2, t3) (m,k)(k,n) | ||||
| 316 | // call void @llvm.x86.tilestored64.internal(m, n,... td) (m,n)(m,n) | ||||
| 317 | // -------------------------------------------------------------------------- | ||||
| 318 | bool X86PreAMXConfig::findConfigShapes( | ||||
| 319 | DenseMap<Instruction *, SmallVector<Value *, 8>> &PosAndShapes) { | ||||
| 320 | bool Find = false; | ||||
| 321 | for (BasicBlock &BB : F) { | ||||
| 322 | for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { | ||||
| 323 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*I); | ||||
| 324 | if (!II
| ||||
| 325 | continue; | ||||
| 326 | if (!isAMXIntrinsic(II)) | ||||
| 327 | continue; | ||||
| 328 | assert(onlyTileDef(II) && "Not volatile model for AMX at O0!")((void)0); | ||||
| 329 | |||||
| 330 | I = getShapesAndConfigPosEnd(I, PosAndShapes[&*I]); | ||||
| 331 | Find = true; | ||||
| 332 | } | ||||
| 333 | } | ||||
| 334 | return Find; | ||||
| 335 | } | ||||
| 336 | |||||
| 337 | // Insert ldtilecfg and preconfig the shapes for each area of key AMX intrinsic. | ||||
| 338 | // e.g. (key amx = tdpbssd) | ||||
| 339 | // -------------------------------------------------------------------------- | ||||
| 340 | // %cfgmem = alloca <16 x i32>, align 4 * allocate mem | ||||
| 341 | // store <16 x i32> zeroinitializer, <16 x i32>* %cfgmem * zero init | ||||
| 342 | // ... | ||||
| 343 | // ... pre-config shape of %t1 * | ||||
| 344 | // store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 * | ||||
| 345 | // store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config | ||||
| 346 | // ... * | ||||
| 347 | // ... pre-config shape of %t2 * | ||||
| 348 | // store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 * shapes | ||||
| 349 | // store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 * | ||||
| 350 | // ... * | ||||
| 351 | // ... pre-config shape of %t3 * of | ||||
| 352 | // store volatile i8 %m, i8* %amx.tmm.2.shape.row, align 1 * | ||||
| 353 | // store volatile i16 %n, i16* %amx.tmm.2.shape.col, align 2 * | ||||
| 354 | // ... * tiles | ||||
| 355 | // ... pre-config shape of %td * | ||||
| 356 | // store volatile i8 %m, i8* %amx.tmm.3.shape.row, align 1 * | ||||
| 357 | // store volatile i16 %n, i16* %amx.tmm.3.shape.col, align 2 * | ||||
| 358 | // | ||||
| 359 | // call void @llvm.x86.ldtilecfg(i8* %cfgmem) * pre-config | ||||
| 360 | // -------------------------------------------------------------------------- | ||||
| 361 | // %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) key | ||||
| 362 | // %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...) | ||||
| 363 | // %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) amx | ||||
| 364 | // %td = tail call x86_amx @llvm.x86.tdpbssd.internal(m, n, k, t1, t2, t3) | ||||
| 365 | // call void @llvm.x86.tilestored64.internal(... td) area | ||||
| 366 | // -------------------------------------------------------------------------- | ||||
| 367 | bool X86PreAMXConfig::preTileConfig() { | ||||
| 368 | DenseMap<Instruction *, SmallVector<Value *, 8>> PosAndShapes; | ||||
| 369 | bool NeedCfg = findConfigShapes(PosAndShapes); | ||||
| 370 | if (!NeedCfg) | ||||
| 371 | return false; | ||||
| 372 | for (auto &IPAndShapes : PosAndShapes) | ||||
| 373 | addTileConfig(IPAndShapes.first, IPAndShapes.second); | ||||
| 374 | |||||
| 375 | return true; | ||||
| 376 | } | ||||
| 377 | } // anonymous namespace | ||||
| 378 | |||||
| 379 | namespace { | ||||
| 380 | |||||
| 381 | class X86PreAMXConfigPass : public FunctionPass { | ||||
| 382 | public: | ||||
| 383 | static char ID; | ||||
| 384 | |||||
| 385 | X86PreAMXConfigPass() : FunctionPass(ID) { | ||||
| 386 | initializeX86PreAMXConfigPassPass(*PassRegistry::getPassRegistry()); | ||||
| 387 | } | ||||
| 388 | |||||
| 389 | bool runOnFunction(Function &F) override { | ||||
| 390 | TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); | ||||
| 391 | bool C = false; | ||||
| 392 | |||||
| 393 | // Prepare for fast register allocation at O0. | ||||
| 394 | if (TM->getOptLevel() == CodeGenOpt::None) { | ||||
| |||||
| 395 | |||||
| 396 | // We pre-config each key AMX intrinsic at O0. | ||||
| 397 | // In theory, one tile config can cover several AMX intrinsics, but | ||||
| 398 | // it is very diffcult to classify the tile shapes at O0. So here we | ||||
| 399 | // let thing be easy, pre-config every key AMX intrinsic. | ||||
| 400 | X86PreAMXConfig PCFG(F); | ||||
| 401 | C = PCFG.preTileConfig(); | ||||
| 402 | } | ||||
| 403 | |||||
| 404 | return C; | ||||
| 405 | } | ||||
| 406 | |||||
| 407 | void getAnalysisUsage(AnalysisUsage &AU) const override { | ||||
| 408 | AU.setPreservesCFG(); | ||||
| 409 | AU.addRequired<TargetPassConfig>(); | ||||
| 410 | } | ||||
| 411 | }; | ||||
| 412 | |||||
| 413 | } // anonymous namespace | ||||
| 414 | |||||
| 415 | static const char PassName[] = "Pre AMX Tile Config"; | ||||
| 416 | char X86PreAMXConfigPass::ID = 0; | ||||
| 417 | INITIALIZE_PASS_BEGIN(X86PreAMXConfigPass, DEBUG_TYPE, PassName, false, false)static void *initializeX86PreAMXConfigPassPassOnce(PassRegistry &Registry) { | ||||
| 418 | INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)initializeTargetPassConfigPass(Registry); | ||||
| 419 | INITIALIZE_PASS_END(X86PreAMXConfigPass, DEBUG_TYPE, PassName, false, false)PassInfo *PI = new PassInfo( PassName, "pre-amx-config", & X86PreAMXConfigPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <X86PreAMXConfigPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeX86PreAMXConfigPassPassFlag ; void llvm::initializeX86PreAMXConfigPassPass(PassRegistry & Registry) { llvm::call_once(InitializeX86PreAMXConfigPassPassFlag , initializeX86PreAMXConfigPassPassOnce, std::ref(Registry)); } | ||||
| 420 | |||||
| 421 | FunctionPass *llvm::createX86PreAMXConfigPass() { | ||||
| 422 | return new X86PreAMXConfigPass(); | ||||
| 423 | } |
| 1 | //===- llvm/ADT/ilist_iterator.h - Intrusive List Iterator ------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_ADT_ILIST_ITERATOR_H |
| 10 | #define LLVM_ADT_ILIST_ITERATOR_H |
| 11 | |
| 12 | #include "llvm/ADT/ilist_node.h" |
| 13 | #include <cassert> |
| 14 | #include <cstddef> |
| 15 | #include <iterator> |
| 16 | #include <type_traits> |
| 17 | |
| 18 | namespace llvm { |
| 19 | |
| 20 | namespace ilist_detail { |
| 21 | |
| 22 | /// Find const-correct node types. |
| 23 | template <class OptionsT, bool IsConst> struct IteratorTraits; |
| 24 | template <class OptionsT> struct IteratorTraits<OptionsT, false> { |
| 25 | using value_type = typename OptionsT::value_type; |
| 26 | using pointer = typename OptionsT::pointer; |
| 27 | using reference = typename OptionsT::reference; |
| 28 | using node_pointer = ilist_node_impl<OptionsT> *; |
| 29 | using node_reference = ilist_node_impl<OptionsT> &; |
| 30 | }; |
| 31 | template <class OptionsT> struct IteratorTraits<OptionsT, true> { |
| 32 | using value_type = const typename OptionsT::value_type; |
| 33 | using pointer = typename OptionsT::const_pointer; |
| 34 | using reference = typename OptionsT::const_reference; |
| 35 | using node_pointer = const ilist_node_impl<OptionsT> *; |
| 36 | using node_reference = const ilist_node_impl<OptionsT> &; |
| 37 | }; |
| 38 | |
| 39 | template <bool IsReverse> struct IteratorHelper; |
| 40 | template <> struct IteratorHelper<false> : ilist_detail::NodeAccess { |
| 41 | using Access = ilist_detail::NodeAccess; |
| 42 | |
| 43 | template <class T> static void increment(T *&I) { I = Access::getNext(*I); } |
| 44 | template <class T> static void decrement(T *&I) { I = Access::getPrev(*I); } |
| 45 | }; |
| 46 | template <> struct IteratorHelper<true> : ilist_detail::NodeAccess { |
| 47 | using Access = ilist_detail::NodeAccess; |
| 48 | |
| 49 | template <class T> static void increment(T *&I) { I = Access::getPrev(*I); } |
| 50 | template <class T> static void decrement(T *&I) { I = Access::getNext(*I); } |
| 51 | }; |
| 52 | |
| 53 | } // end namespace ilist_detail |
| 54 | |
| 55 | /// Iterator for intrusive lists based on ilist_node. |
| 56 | template <class OptionsT, bool IsReverse, bool IsConst> |
| 57 | class ilist_iterator : ilist_detail::SpecificNodeAccess<OptionsT> { |
| 58 | friend ilist_iterator<OptionsT, IsReverse, !IsConst>; |
| 59 | friend ilist_iterator<OptionsT, !IsReverse, IsConst>; |
| 60 | friend ilist_iterator<OptionsT, !IsReverse, !IsConst>; |
| 61 | |
| 62 | using Traits = ilist_detail::IteratorTraits<OptionsT, IsConst>; |
| 63 | using Access = ilist_detail::SpecificNodeAccess<OptionsT>; |
| 64 | |
| 65 | public: |
| 66 | using value_type = typename Traits::value_type; |
| 67 | using pointer = typename Traits::pointer; |
| 68 | using reference = typename Traits::reference; |
| 69 | using difference_type = ptrdiff_t; |
| 70 | using iterator_category = std::bidirectional_iterator_tag; |
| 71 | using const_pointer = typename OptionsT::const_pointer; |
| 72 | using const_reference = typename OptionsT::const_reference; |
| 73 | |
| 74 | private: |
| 75 | using node_pointer = typename Traits::node_pointer; |
| 76 | using node_reference = typename Traits::node_reference; |
| 77 | |
| 78 | node_pointer NodePtr = nullptr; |
| 79 | |
| 80 | public: |
| 81 | /// Create from an ilist_node. |
| 82 | explicit ilist_iterator(node_reference N) : NodePtr(&N) {} |
| 83 | |
| 84 | explicit ilist_iterator(pointer NP) : NodePtr(Access::getNodePtr(NP)) {} |
| 85 | explicit ilist_iterator(reference NR) : NodePtr(Access::getNodePtr(&NR)) {} |
| 86 | ilist_iterator() = default; |
| 87 | |
| 88 | // This is templated so that we can allow constructing a const iterator from |
| 89 | // a nonconst iterator... |
| 90 | template <bool RHSIsConst> |
| 91 | ilist_iterator(const ilist_iterator<OptionsT, IsReverse, RHSIsConst> &RHS, |
| 92 | std::enable_if_t<IsConst || !RHSIsConst, void *> = nullptr) |
| 93 | : NodePtr(RHS.NodePtr) {} |
| 94 | |
| 95 | // This is templated so that we can allow assigning to a const iterator from |
| 96 | // a nonconst iterator... |
| 97 | template <bool RHSIsConst> |
| 98 | std::enable_if_t<IsConst || !RHSIsConst, ilist_iterator &> |
| 99 | operator=(const ilist_iterator<OptionsT, IsReverse, RHSIsConst> &RHS) { |
| 100 | NodePtr = RHS.NodePtr; |
| 101 | return *this; |
| 102 | } |
| 103 | |
| 104 | /// Explicit conversion between forward/reverse iterators. |
| 105 | /// |
| 106 | /// Translate between forward and reverse iterators without changing range |
| 107 | /// boundaries. The resulting iterator will dereference (and have a handle) |
| 108 | /// to the previous node, which is somewhat unexpected; but converting the |
| 109 | /// two endpoints in a range will give the same range in reverse. |
| 110 | /// |
| 111 | /// This matches std::reverse_iterator conversions. |
| 112 | explicit ilist_iterator( |
| 113 | const ilist_iterator<OptionsT, !IsReverse, IsConst> &RHS) |
| 114 | : ilist_iterator(++RHS.getReverse()) {} |
| 115 | |
| 116 | /// Get a reverse iterator to the same node. |
| 117 | /// |
| 118 | /// Gives a reverse iterator that will dereference (and have a handle) to the |
| 119 | /// same node. Converting the endpoint iterators in a range will give a |
| 120 | /// different range; for range operations, use the explicit conversions. |
| 121 | ilist_iterator<OptionsT, !IsReverse, IsConst> getReverse() const { |
| 122 | if (NodePtr) |
| 123 | return ilist_iterator<OptionsT, !IsReverse, IsConst>(*NodePtr); |
| 124 | return ilist_iterator<OptionsT, !IsReverse, IsConst>(); |
| 125 | } |
| 126 | |
| 127 | /// Const-cast. |
| 128 | ilist_iterator<OptionsT, IsReverse, false> getNonConst() const { |
| 129 | if (NodePtr) |
| 130 | return ilist_iterator<OptionsT, IsReverse, false>( |
| 131 | const_cast<typename ilist_iterator<OptionsT, IsReverse, |
| 132 | false>::node_reference>(*NodePtr)); |
| 133 | return ilist_iterator<OptionsT, IsReverse, false>(); |
| 134 | } |
| 135 | |
| 136 | // Accessors... |
| 137 | reference operator*() const { |
| 138 | assert(!NodePtr->isKnownSentinel())((void)0); |
| 139 | return *Access::getValuePtr(NodePtr); |
| 140 | } |
| 141 | pointer operator->() const { return &operator*(); } |
| 142 | |
| 143 | // Comparison operators |
| 144 | friend bool operator==(const ilist_iterator &LHS, const ilist_iterator &RHS) { |
| 145 | return LHS.NodePtr == RHS.NodePtr; |
| 146 | } |
| 147 | friend bool operator!=(const ilist_iterator &LHS, const ilist_iterator &RHS) { |
| 148 | return LHS.NodePtr != RHS.NodePtr; |
| 149 | } |
| 150 | |
| 151 | // Increment and decrement operators... |
| 152 | ilist_iterator &operator--() { |
| 153 | NodePtr = IsReverse ? NodePtr->getNext() : NodePtr->getPrev(); |
| 154 | return *this; |
| 155 | } |
| 156 | ilist_iterator &operator++() { |
| 157 | NodePtr = IsReverse ? NodePtr->getPrev() : NodePtr->getNext(); |
| 158 | return *this; |
| 159 | } |
| 160 | ilist_iterator operator--(int) { |
| 161 | ilist_iterator tmp = *this; |
| 162 | --*this; |
| 163 | return tmp; |
| 164 | } |
| 165 | ilist_iterator operator++(int) { |
| 166 | ilist_iterator tmp = *this; |
| 167 | ++*this; |
| 168 | return tmp; |
| 169 | } |
| 170 | |
| 171 | /// Get the underlying ilist_node. |
| 172 | node_pointer getNodePtr() const { return static_cast<node_pointer>(NodePtr); } |
| 173 | |
| 174 | /// Check for end. Only valid if ilist_sentinel_tracking<true>. |
| 175 | bool isEnd() const { return NodePtr ? NodePtr->isSentinel() : false; } |
| 176 | }; |
| 177 | |
| 178 | template <typename From> struct simplify_type; |
| 179 | |
| 180 | /// Allow ilist_iterators to convert into pointers to a node automatically when |
| 181 | /// used by the dyn_cast, cast, isa mechanisms... |
| 182 | /// |
| 183 | /// FIXME: remove this, since there is no implicit conversion to NodeTy. |
| 184 | template <class OptionsT, bool IsConst> |
| 185 | struct simplify_type<ilist_iterator<OptionsT, false, IsConst>> { |
| 186 | using iterator = ilist_iterator<OptionsT, false, IsConst>; |
| 187 | using SimpleType = typename iterator::pointer; |
| 188 | |
| 189 | static SimpleType getSimplifiedValue(const iterator &Node) { return &*Node; } |
| 190 | }; |
| 191 | template <class OptionsT, bool IsConst> |
| 192 | struct simplify_type<const ilist_iterator<OptionsT, false, IsConst>> |
| 193 | : simplify_type<ilist_iterator<OptionsT, false, IsConst>> {}; |
| 194 | |
| 195 | } // end namespace llvm |
| 196 | |
| 197 | #endif // LLVM_ADT_ILIST_ITERATOR_H |