File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86PreAMXConfig.cpp |
Warning: | line 239, column 28 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- Target/X86/X86PreAMXConfig.cpp - ------------------------*- C++ -*-===// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | // | ||||
9 | /// Insert tilecfg for each area of key AMX intrinsic. | ||||
10 | /// All the key AMX intrinsic's tile operand must come from tileload. And the | ||||
11 | /// def tile of key AMX intrinsic must be tilestored. | ||||
12 | /// take tdpbssd for example: | ||||
13 | /// -------------------------------------------------------------------------- | ||||
14 | /// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(...) key | ||||
15 | /// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(...) | | ||||
16 | /// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(...) amx | ||||
17 | /// %td = tail call x86_amx @llvm.x86.tdpbssd.internal(t1, t2, t3) | | ||||
18 | /// call void @llvm.x86.tilestored64.internal(... td) area | ||||
19 | /// -------------------------------------------------------------------------- | ||||
20 | /// This pass will insert tilecfg before every key-amx-area, some like: | ||||
21 | /// -------------------------------------------------------------------------- | ||||
22 | /// %cfgmem = alloca <16 x i32>, align 4 * allocate mem | ||||
23 | /// store <16 x i32> zeroinitializer, <16 x i32>* %cfgmem * zero init | ||||
24 | /// ... | ||||
25 | /// ... pre-config shape of %t1 * | ||||
26 | /// store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 * | ||||
27 | /// store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config | ||||
28 | /// ... * | ||||
29 | /// ... pre-config shape of %t2 * shapes | ||||
30 | /// store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 * | ||||
31 | /// store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 * | ||||
32 | /// ... | ||||
33 | /// call void @llvm.x86.ldtilecfg(i8* %cfgmem) * tile config | ||||
34 | // | ||||
35 | //===----------------------------------------------------------------------===// | ||||
36 | // | ||||
37 | #include "X86.h" | ||||
38 | #include "llvm/ADT/SmallSet.h" | ||||
39 | #include "llvm/Analysis/TargetTransformInfo.h" | ||||
40 | #include "llvm/CodeGen/Passes.h" | ||||
41 | #include "llvm/CodeGen/TargetPassConfig.h" | ||||
42 | #include "llvm/CodeGen/ValueTypes.h" | ||||
43 | #include "llvm/IR/DataLayout.h" | ||||
44 | #include "llvm/IR/Function.h" | ||||
45 | #include "llvm/IR/IRBuilder.h" | ||||
46 | #include "llvm/IR/Instructions.h" | ||||
47 | #include "llvm/IR/IntrinsicInst.h" | ||||
48 | #include "llvm/IR/IntrinsicsX86.h" | ||||
49 | #include "llvm/IR/PatternMatch.h" | ||||
50 | #include "llvm/InitializePasses.h" | ||||
51 | #include "llvm/Pass.h" | ||||
52 | #include "llvm/Support/raw_ostream.h" | ||||
53 | #include "llvm/Target/TargetMachine.h" | ||||
54 | |||||
55 | using namespace llvm; | ||||
56 | using namespace PatternMatch; | ||||
57 | |||||
58 | #define DEBUG_TYPE"pre-amx-config" "pre-amx-config" | ||||
59 | |||||
60 | static bool isAMXIntrinsic(IntrinsicInst *II) { | ||||
61 | for (Value *Operand : II->operands()) | ||||
62 | if (Operand->getType()->isX86_AMXTy()) | ||||
63 | return true; | ||||
64 | return II->getType()->isX86_AMXTy(); | ||||
65 | } | ||||
66 | |||||
67 | static bool isTileLoad(IntrinsicInst *II) { | ||||
68 | return II->getIntrinsicID() == Intrinsic::x86_tileloadd64_internal || | ||||
69 | II->getIntrinsicID() == Intrinsic::x86_tileloaddt164_internal; | ||||
70 | } | ||||
71 | |||||
72 | static bool isTileStore(IntrinsicInst *II) { | ||||
73 | return II->getIntrinsicID() == Intrinsic::x86_tilestored64_internal; | ||||
74 | } | ||||
75 | |||||
76 | #ifndef NDEBUG1 | ||||
77 | static bool onlyTileDef(IntrinsicInst *II) { | ||||
78 | for (Value *Operand : II->operands()) | ||||
79 | if (Operand->getType()->isX86_AMXTy()) | ||||
80 | return false; | ||||
81 | return II->getType()->isX86_AMXTy(); | ||||
82 | } | ||||
83 | |||||
84 | static bool brokenVolatile(Instruction *I) { | ||||
85 | // Todo: it is weak to identify a normal call here. | ||||
86 | if ((isa<CallInst>(I) && !isa<IntrinsicInst>(I)) || I->isTerminator()) | ||||
87 | return true; | ||||
88 | return false; | ||||
89 | } | ||||
90 | #endif | ||||
91 | |||||
92 | namespace { | ||||
93 | class X86PreAMXConfig { | ||||
94 | Function &F; | ||||
95 | |||||
96 | public: | ||||
97 | X86PreAMXConfig(Function &Func) : F(Func) {} | ||||
98 | bool preTileConfig(); | ||||
99 | bool addTileConfig(Instruction *ModelStart, SmallVector<Value *, 8> &Shapes); | ||||
100 | bool findConfigShapes( | ||||
101 | DenseMap<Instruction *, SmallVector<Value *, 8>> &PosAndShapes); | ||||
102 | bool getKeyAMXShapes(IntrinsicInst *KeyAMX, SmallVector<Value *, 8> &Shapes); | ||||
103 | bool preWriteTileCfg(Value *I8Ptr, Instruction *Pos, | ||||
104 | SmallVector<Value *, 8> &Shapes); | ||||
105 | BasicBlock::iterator | ||||
106 | getShapesAndConfigPosEnd(BasicBlock::iterator Iter, | ||||
107 | SmallVector<Value *, 8> &Shapes); | ||||
108 | bool checkVolatileModel(SmallSet<Value *, 4> &Loads, IntrinsicInst *Store, | ||||
109 | IntrinsicInst *KeyAMX); | ||||
110 | }; | ||||
111 | |||||
112 | // Orderly write the shapes in tilecfg's mem. This maybe not right. | ||||
113 | // Because the first shape may not corresponding to the first tmm register, | ||||
114 | // so we need to handle at at X86FastTileConfig::materializeTileCfg() | ||||
115 | // after register allocation. | ||||
116 | // For example: | ||||
117 | // -------------------------------------------------------------------------- | ||||
118 | // zeroinitialize tilecfg's mem (of ldtilecfg) | ||||
119 | // -------------------------------------------------------------------------- | ||||
120 | // ... pre-config shape of %t1 * | ||||
121 | // %amx.tmm.0.shape.row = getelementptr i8, i8* %mem, i64 48 * | ||||
122 | // %amx.tmm.0.shape.col = getelementptr i16, i16* %mem, i64 16 * | ||||
123 | // store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 * | ||||
124 | // store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config | ||||
125 | // ... * | ||||
126 | // ... pre-config shape of %t2 * | ||||
127 | // %amx.tmm.1.shape.row = getelementptr i8, i8* %mem, i64 49 * | ||||
128 | // %amx.tmm.1.shape.col = getelementptr i16, i16* %mem, i64 18 * | ||||
129 | // store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 * shapes | ||||
130 | // store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 * | ||||
131 | // ... * | ||||
132 | // ... pre-config shape of %t3 * of | ||||
133 | // %amx.tmm.2.shape.row = getelementptr i8, i8* %mem, i64 50 * | ||||
134 | // %amx.tmm.2.shape.col = getelementptr i16, i16* %mem, i64 20 * | ||||
135 | // store volatile i8 %m, i8* %amx.tmm.2.shape.row, align 1 * | ||||
136 | // store volatile i16 %n, i16* %amx.tmm.2.shape.col, align 2 * | ||||
137 | // ... * tiles | ||||
138 | // ... pre-config shape of %td * | ||||
139 | // %amx.tmm.3.shape.row = getelementptr i8, i8* %mem, i64 51 * | ||||
140 | // %amx.tmm.3.shape.col = getelementptr i16, i16* %mem, i64 22 * | ||||
141 | // store volatile i8 %m, i8* %amx.tmm.3.shape.row, align 1 * | ||||
142 | // store volatile i16 %n, i16* %amx.tmm.3.shape.col, align 2 * | ||||
143 | // -------------------------------------------------------------------------- | ||||
144 | // call void @llvm.x86.ldtilecfg(i8* %mem) * tile config | ||||
145 | // -------------------------------------------------------------------------- | ||||
146 | // %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) key | ||||
147 | // %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...) | ||||
148 | // %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) amx | ||||
149 | // %td = tail call x86_amx @llvm.x86.tdpbssd.internal(m, n, k, t1, t2, t3) | ||||
150 | // call void @llvm.x86.tilestored64.internal(... td) area | ||||
151 | // -------------------------------------------------------------------------- | ||||
152 | bool X86PreAMXConfig::preWriteTileCfg(Value *I8Ptr, Instruction *Pos, | ||||
153 | SmallVector<Value *, 8> &Shapes) { | ||||
154 | bool Write = false; | ||||
155 | LLVMContext &Ctx = Pos->getParent()->getContext(); | ||||
156 | Type *I8Ty = Type::getInt8Ty(Ctx); | ||||
157 | Type *I16Ty = Type::getInt16Ty(Ctx); | ||||
158 | |||||
159 | // TODO: Currently we defaultly set Palette = 1, it may be assigned to | ||||
160 | // other value in the future. | ||||
161 | Value *PaletteOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 0); | ||||
162 | Value *PaletteValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1); | ||||
163 | Value *PalettePos = | ||||
164 | GetElementPtrInst::Create(I8Ty, I8Ptr, PaletteOffset, "", Pos); | ||||
165 | new StoreInst(PaletteValue, PalettePos, Pos); | ||||
166 | |||||
167 | for (int I = 0, E = Shapes.size() / 2; I < E; I++) { | ||||
168 | Value *RowOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 48 + I); | ||||
169 | Value *ColOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 16 + I * 2); | ||||
170 | const std::string ShapeName = "amx.tmm." + itostr(I); | ||||
171 | Value *RowPos = GetElementPtrInst::Create(I8Ty, I8Ptr, RowOffset, | ||||
172 | ShapeName + ".shape.row", Pos); | ||||
173 | Value *ColPos = GetElementPtrInst::Create(I8Ty, I8Ptr, ColOffset, "", Pos); | ||||
174 | ColPos = new BitCastInst(ColPos, PointerType::get(I16Ty, 0), | ||||
175 | ShapeName + ".shape.col", Pos); | ||||
176 | Value *Row = Shapes[I * 2]; | ||||
177 | Value *Col = Shapes[I * 2 + 1]; | ||||
178 | Row = new TruncInst(Row, I8Ty, "", Pos); | ||||
179 | new StoreInst(Row, RowPos, Pos); | ||||
180 | new StoreInst(Col, ColPos, Pos); | ||||
181 | Write = true; | ||||
182 | } | ||||
183 | return Write; | ||||
184 | } | ||||
185 | |||||
186 | bool X86PreAMXConfig::addTileConfig(Instruction *ModelStart, | ||||
187 | SmallVector<Value *, 8> &Shapes) { | ||||
188 | Module *M = F.getParent(); | ||||
189 | IRBuilder<> Builder(ModelStart); | ||||
190 | const DataLayout &DL = M->getDataLayout(); | ||||
191 | unsigned AddrSpace = DL.getAllocaAddrSpace(); | ||||
192 | LLVMContext &Ctx = Builder.getContext(); | ||||
193 | Type *V512Ty = VectorType::get(Builder.getInt32Ty(), 16, false); | ||||
194 | Align Alignment = DL.getPrefTypeAlign(Type::getInt32Ty(Ctx)); | ||||
195 | |||||
196 | AllocaInst *Addr = | ||||
197 | new AllocaInst(V512Ty, AddrSpace, "", &F.getEntryBlock().front()); | ||||
198 | Addr->setAlignment(Alignment); | ||||
199 | Value *I8Ptr = Builder.CreateBitCast(Addr, Builder.getInt8PtrTy()); | ||||
200 | |||||
201 | std::array<Value *, 1> Args = {I8Ptr}; | ||||
202 | Instruction *Cfg = | ||||
203 | Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, None, Args); | ||||
204 | |||||
205 | Value *Val0 = Constant::getNullValue(V512Ty); | ||||
206 | Instruction *Init0 = new StoreInst(Val0, Addr, false, Alignment, Cfg); | ||||
207 | assert(Init0 && "Not Zero initilizate the cfg mem!")((void)0); | ||||
208 | |||||
209 | preWriteTileCfg(I8Ptr, Cfg, Shapes); | ||||
210 | |||||
211 | return Init0; | ||||
212 | } | ||||
213 | |||||
214 | // Todo: We may need to handle "more than one store" case in the future. | ||||
215 | bool X86PreAMXConfig::checkVolatileModel(SmallSet<Value *, 4> &Loads, | ||||
216 | IntrinsicInst *Store, | ||||
217 | IntrinsicInst *KeyAMX) { | ||||
218 | Value *ST = Store->getOperand(4); | ||||
219 | |||||
220 | // Only has tileload and tilestore. | ||||
221 | if (!KeyAMX) | ||||
222 | return (Loads.size() == 1) && Loads.contains(ST); | ||||
223 | |||||
224 | // All Loads should be operands of KeyAMX. | ||||
225 | // All tile operands of KeyAMX should come from Loads. | ||||
226 | for (Value *Op : KeyAMX->operands()) { | ||||
227 | if (Op->getType()->isX86_AMXTy()) | ||||
228 | if (!Loads.erase(Op)) | ||||
229 | return false; | ||||
230 | } | ||||
231 | |||||
232 | // The def of KeyAMX should be stored into mem. | ||||
233 | // Todo: is it key amx can be no def? | ||||
234 | return Loads.empty() && (ST == cast<Value>(KeyAMX)); | ||||
235 | } | ||||
236 | |||||
237 | bool X86PreAMXConfig::getKeyAMXShapes(IntrinsicInst *KeyAMX, | ||||
238 | SmallVector<Value *, 8> &Shapes) { | ||||
239 | for (unsigned I = 0; I < KeyAMX->getNumOperands(); I++) { | ||||
| |||||
240 | Value *Op = KeyAMX->getOperand(I); | ||||
241 | if (!Op->getType()->isX86_AMXTy()) | ||||
242 | continue; | ||||
243 | IntrinsicInst *TileDef = dyn_cast<IntrinsicInst>(Op); | ||||
244 | assert((TileDef && isTileLoad(TileDef)) &&((void)0) | ||||
245 | "All KeyAMX's tile definiation should comes from TileLoad!")((void)0); | ||||
246 | Shapes.push_back(TileDef->getOperand(0)); | ||||
247 | Shapes.push_back(TileDef->getOperand(1)); | ||||
248 | } | ||||
249 | if (!isTileStore(KeyAMX)) { | ||||
250 | Shapes.push_back(KeyAMX->getOperand(0)); | ||||
251 | Shapes.push_back(KeyAMX->getOperand(1)); | ||||
252 | } | ||||
253 | return Shapes.size() != 0; | ||||
254 | } | ||||
255 | |||||
256 | // Collect the shapes and skip the area of current key amx intrinsic. | ||||
257 | // | ||||
258 | // For example: | ||||
259 | // ... | ||||
260 | // -------------------------------------------------------------------------- | ||||
261 | // %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) record (m,k) | ||||
262 | // %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...) record (m,k) | ||||
263 | // %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) record (m,k) | ||||
264 | // %td = call x86_amx @llvm.x86.tdpbssd.internal(...t1, t2, t3) | ||||
265 | // call void @llvm.x86.tilestored64.internal(m, n,... td) <--PosEnd record (m,k) | ||||
266 | // -------------------------------------------------------------------------- | ||||
267 | BasicBlock::iterator | ||||
268 | X86PreAMXConfig::getShapesAndConfigPosEnd(BasicBlock::iterator Iter, | ||||
269 | SmallVector<Value *, 8> &Shapes) { | ||||
270 | IntrinsicInst *KeyAMX = nullptr; | ||||
271 | BasicBlock *BB = Iter->getParent(); | ||||
272 | BasicBlock::iterator PosEnd = BB->end(); | ||||
273 | SmallSet<Value *, 4> Loads; | ||||
274 | |||||
275 | // See TileStore as "Config Position End" and check volatile model. | ||||
276 | for (auto I = Iter, E = BB->end(); I != E; ++I) { | ||||
277 | assert(!brokenVolatile(&*I) && "Not reach tile store!")((void)0); | ||||
278 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*I); | ||||
279 | if (!II || !isAMXIntrinsic(II)) | ||||
280 | continue; | ||||
281 | |||||
282 | if (isTileLoad(II)) { | ||||
283 | Loads.insert(II); | ||||
284 | } else if (isTileStore(II)) { | ||||
285 | if (!checkVolatileModel(Loads, II, KeyAMX)) | ||||
286 | report_fatal_error("Not Volatile AMX Model!"); | ||||
287 | PosEnd = I; | ||||
288 | break; | ||||
289 | } else { | ||||
290 | assert(!KeyAMX && "Too many key amx intrinsic!")((void)0); | ||||
291 | KeyAMX = II; | ||||
292 | } | ||||
293 | } | ||||
294 | assert(PosEnd != BB->end() && "Not find TileStore!")((void)0); | ||||
295 | |||||
296 | // See KeyAMX as TileStore if only TileLoad and TileStore. | ||||
297 | if (!KeyAMX
| ||||
298 | KeyAMX = dyn_cast<IntrinsicInst>(&*PosEnd); | ||||
299 | |||||
300 | // Get Shapes in order. | ||||
301 | assert(Shapes.empty() && "Shapes should be clean.")((void)0); | ||||
302 | getKeyAMXShapes(KeyAMX, Shapes); | ||||
303 | |||||
304 | return PosEnd; | ||||
305 | } | ||||
306 | |||||
307 | // Record a key amx area's shapes with its position. | ||||
308 | // Use the first tileload as its position. | ||||
309 | // For example: | ||||
310 | // ... | ||||
311 | // -------------------------------------------------------------------------- | ||||
312 | // %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) <-- pos | ||||
313 | // %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...) / | ||||
314 | // %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) shapes: | ||||
315 | // %td = call x86_amx @llvm.x86.tdpbssd.internal(...t1, t2, t3) (m,k)(k,n) | ||||
316 | // call void @llvm.x86.tilestored64.internal(m, n,... td) (m,n)(m,n) | ||||
317 | // -------------------------------------------------------------------------- | ||||
318 | bool X86PreAMXConfig::findConfigShapes( | ||||
319 | DenseMap<Instruction *, SmallVector<Value *, 8>> &PosAndShapes) { | ||||
320 | bool Find = false; | ||||
321 | for (BasicBlock &BB : F) { | ||||
322 | for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { | ||||
323 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*I); | ||||
324 | if (!II
| ||||
325 | continue; | ||||
326 | if (!isAMXIntrinsic(II)) | ||||
327 | continue; | ||||
328 | assert(onlyTileDef(II) && "Not volatile model for AMX at O0!")((void)0); | ||||
329 | |||||
330 | I = getShapesAndConfigPosEnd(I, PosAndShapes[&*I]); | ||||
331 | Find = true; | ||||
332 | } | ||||
333 | } | ||||
334 | return Find; | ||||
335 | } | ||||
336 | |||||
337 | // Insert ldtilecfg and preconfig the shapes for each area of key AMX intrinsic. | ||||
338 | // e.g. (key amx = tdpbssd) | ||||
339 | // -------------------------------------------------------------------------- | ||||
340 | // %cfgmem = alloca <16 x i32>, align 4 * allocate mem | ||||
341 | // store <16 x i32> zeroinitializer, <16 x i32>* %cfgmem * zero init | ||||
342 | // ... | ||||
343 | // ... pre-config shape of %t1 * | ||||
344 | // store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 * | ||||
345 | // store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config | ||||
346 | // ... * | ||||
347 | // ... pre-config shape of %t2 * | ||||
348 | // store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 * shapes | ||||
349 | // store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 * | ||||
350 | // ... * | ||||
351 | // ... pre-config shape of %t3 * of | ||||
352 | // store volatile i8 %m, i8* %amx.tmm.2.shape.row, align 1 * | ||||
353 | // store volatile i16 %n, i16* %amx.tmm.2.shape.col, align 2 * | ||||
354 | // ... * tiles | ||||
355 | // ... pre-config shape of %td * | ||||
356 | // store volatile i8 %m, i8* %amx.tmm.3.shape.row, align 1 * | ||||
357 | // store volatile i16 %n, i16* %amx.tmm.3.shape.col, align 2 * | ||||
358 | // | ||||
359 | // call void @llvm.x86.ldtilecfg(i8* %cfgmem) * pre-config | ||||
360 | // -------------------------------------------------------------------------- | ||||
361 | // %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) key | ||||
362 | // %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...) | ||||
363 | // %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) amx | ||||
364 | // %td = tail call x86_amx @llvm.x86.tdpbssd.internal(m, n, k, t1, t2, t3) | ||||
365 | // call void @llvm.x86.tilestored64.internal(... td) area | ||||
366 | // -------------------------------------------------------------------------- | ||||
367 | bool X86PreAMXConfig::preTileConfig() { | ||||
368 | DenseMap<Instruction *, SmallVector<Value *, 8>> PosAndShapes; | ||||
369 | bool NeedCfg = findConfigShapes(PosAndShapes); | ||||
370 | if (!NeedCfg) | ||||
371 | return false; | ||||
372 | for (auto &IPAndShapes : PosAndShapes) | ||||
373 | addTileConfig(IPAndShapes.first, IPAndShapes.second); | ||||
374 | |||||
375 | return true; | ||||
376 | } | ||||
377 | } // anonymous namespace | ||||
378 | |||||
379 | namespace { | ||||
380 | |||||
381 | class X86PreAMXConfigPass : public FunctionPass { | ||||
382 | public: | ||||
383 | static char ID; | ||||
384 | |||||
385 | X86PreAMXConfigPass() : FunctionPass(ID) { | ||||
386 | initializeX86PreAMXConfigPassPass(*PassRegistry::getPassRegistry()); | ||||
387 | } | ||||
388 | |||||
389 | bool runOnFunction(Function &F) override { | ||||
390 | TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); | ||||
391 | bool C = false; | ||||
392 | |||||
393 | // Prepare for fast register allocation at O0. | ||||
394 | if (TM->getOptLevel() == CodeGenOpt::None) { | ||||
| |||||
395 | |||||
396 | // We pre-config each key AMX intrinsic at O0. | ||||
397 | // In theory, one tile config can cover several AMX intrinsics, but | ||||
398 | // it is very diffcult to classify the tile shapes at O0. So here we | ||||
399 | // let thing be easy, pre-config every key AMX intrinsic. | ||||
400 | X86PreAMXConfig PCFG(F); | ||||
401 | C = PCFG.preTileConfig(); | ||||
402 | } | ||||
403 | |||||
404 | return C; | ||||
405 | } | ||||
406 | |||||
407 | void getAnalysisUsage(AnalysisUsage &AU) const override { | ||||
408 | AU.setPreservesCFG(); | ||||
409 | AU.addRequired<TargetPassConfig>(); | ||||
410 | } | ||||
411 | }; | ||||
412 | |||||
413 | } // anonymous namespace | ||||
414 | |||||
415 | static const char PassName[] = "Pre AMX Tile Config"; | ||||
416 | char X86PreAMXConfigPass::ID = 0; | ||||
417 | INITIALIZE_PASS_BEGIN(X86PreAMXConfigPass, DEBUG_TYPE, PassName, false, false)static void *initializeX86PreAMXConfigPassPassOnce(PassRegistry &Registry) { | ||||
418 | INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)initializeTargetPassConfigPass(Registry); | ||||
419 | INITIALIZE_PASS_END(X86PreAMXConfigPass, DEBUG_TYPE, PassName, false, false)PassInfo *PI = new PassInfo( PassName, "pre-amx-config", & X86PreAMXConfigPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <X86PreAMXConfigPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeX86PreAMXConfigPassPassFlag ; void llvm::initializeX86PreAMXConfigPassPass(PassRegistry & Registry) { llvm::call_once(InitializeX86PreAMXConfigPassPassFlag , initializeX86PreAMXConfigPassPassOnce, std::ref(Registry)); } | ||||
420 | |||||
421 | FunctionPass *llvm::createX86PreAMXConfigPass() { | ||||
422 | return new X86PreAMXConfigPass(); | ||||
423 | } |
1 | //===- llvm/ADT/ilist_iterator.h - Intrusive List Iterator ------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_ADT_ILIST_ITERATOR_H |
10 | #define LLVM_ADT_ILIST_ITERATOR_H |
11 | |
12 | #include "llvm/ADT/ilist_node.h" |
13 | #include <cassert> |
14 | #include <cstddef> |
15 | #include <iterator> |
16 | #include <type_traits> |
17 | |
18 | namespace llvm { |
19 | |
20 | namespace ilist_detail { |
21 | |
22 | /// Find const-correct node types. |
23 | template <class OptionsT, bool IsConst> struct IteratorTraits; |
24 | template <class OptionsT> struct IteratorTraits<OptionsT, false> { |
25 | using value_type = typename OptionsT::value_type; |
26 | using pointer = typename OptionsT::pointer; |
27 | using reference = typename OptionsT::reference; |
28 | using node_pointer = ilist_node_impl<OptionsT> *; |
29 | using node_reference = ilist_node_impl<OptionsT> &; |
30 | }; |
31 | template <class OptionsT> struct IteratorTraits<OptionsT, true> { |
32 | using value_type = const typename OptionsT::value_type; |
33 | using pointer = typename OptionsT::const_pointer; |
34 | using reference = typename OptionsT::const_reference; |
35 | using node_pointer = const ilist_node_impl<OptionsT> *; |
36 | using node_reference = const ilist_node_impl<OptionsT> &; |
37 | }; |
38 | |
39 | template <bool IsReverse> struct IteratorHelper; |
40 | template <> struct IteratorHelper<false> : ilist_detail::NodeAccess { |
41 | using Access = ilist_detail::NodeAccess; |
42 | |
43 | template <class T> static void increment(T *&I) { I = Access::getNext(*I); } |
44 | template <class T> static void decrement(T *&I) { I = Access::getPrev(*I); } |
45 | }; |
46 | template <> struct IteratorHelper<true> : ilist_detail::NodeAccess { |
47 | using Access = ilist_detail::NodeAccess; |
48 | |
49 | template <class T> static void increment(T *&I) { I = Access::getPrev(*I); } |
50 | template <class T> static void decrement(T *&I) { I = Access::getNext(*I); } |
51 | }; |
52 | |
53 | } // end namespace ilist_detail |
54 | |
55 | /// Iterator for intrusive lists based on ilist_node. |
56 | template <class OptionsT, bool IsReverse, bool IsConst> |
57 | class ilist_iterator : ilist_detail::SpecificNodeAccess<OptionsT> { |
58 | friend ilist_iterator<OptionsT, IsReverse, !IsConst>; |
59 | friend ilist_iterator<OptionsT, !IsReverse, IsConst>; |
60 | friend ilist_iterator<OptionsT, !IsReverse, !IsConst>; |
61 | |
62 | using Traits = ilist_detail::IteratorTraits<OptionsT, IsConst>; |
63 | using Access = ilist_detail::SpecificNodeAccess<OptionsT>; |
64 | |
65 | public: |
66 | using value_type = typename Traits::value_type; |
67 | using pointer = typename Traits::pointer; |
68 | using reference = typename Traits::reference; |
69 | using difference_type = ptrdiff_t; |
70 | using iterator_category = std::bidirectional_iterator_tag; |
71 | using const_pointer = typename OptionsT::const_pointer; |
72 | using const_reference = typename OptionsT::const_reference; |
73 | |
74 | private: |
75 | using node_pointer = typename Traits::node_pointer; |
76 | using node_reference = typename Traits::node_reference; |
77 | |
78 | node_pointer NodePtr = nullptr; |
79 | |
80 | public: |
81 | /// Create from an ilist_node. |
82 | explicit ilist_iterator(node_reference N) : NodePtr(&N) {} |
83 | |
84 | explicit ilist_iterator(pointer NP) : NodePtr(Access::getNodePtr(NP)) {} |
85 | explicit ilist_iterator(reference NR) : NodePtr(Access::getNodePtr(&NR)) {} |
86 | ilist_iterator() = default; |
87 | |
88 | // This is templated so that we can allow constructing a const iterator from |
89 | // a nonconst iterator... |
90 | template <bool RHSIsConst> |
91 | ilist_iterator(const ilist_iterator<OptionsT, IsReverse, RHSIsConst> &RHS, |
92 | std::enable_if_t<IsConst || !RHSIsConst, void *> = nullptr) |
93 | : NodePtr(RHS.NodePtr) {} |
94 | |
95 | // This is templated so that we can allow assigning to a const iterator from |
96 | // a nonconst iterator... |
97 | template <bool RHSIsConst> |
98 | std::enable_if_t<IsConst || !RHSIsConst, ilist_iterator &> |
99 | operator=(const ilist_iterator<OptionsT, IsReverse, RHSIsConst> &RHS) { |
100 | NodePtr = RHS.NodePtr; |
101 | return *this; |
102 | } |
103 | |
104 | /// Explicit conversion between forward/reverse iterators. |
105 | /// |
106 | /// Translate between forward and reverse iterators without changing range |
107 | /// boundaries. The resulting iterator will dereference (and have a handle) |
108 | /// to the previous node, which is somewhat unexpected; but converting the |
109 | /// two endpoints in a range will give the same range in reverse. |
110 | /// |
111 | /// This matches std::reverse_iterator conversions. |
112 | explicit ilist_iterator( |
113 | const ilist_iterator<OptionsT, !IsReverse, IsConst> &RHS) |
114 | : ilist_iterator(++RHS.getReverse()) {} |
115 | |
116 | /// Get a reverse iterator to the same node. |
117 | /// |
118 | /// Gives a reverse iterator that will dereference (and have a handle) to the |
119 | /// same node. Converting the endpoint iterators in a range will give a |
120 | /// different range; for range operations, use the explicit conversions. |
121 | ilist_iterator<OptionsT, !IsReverse, IsConst> getReverse() const { |
122 | if (NodePtr) |
123 | return ilist_iterator<OptionsT, !IsReverse, IsConst>(*NodePtr); |
124 | return ilist_iterator<OptionsT, !IsReverse, IsConst>(); |
125 | } |
126 | |
127 | /// Const-cast. |
128 | ilist_iterator<OptionsT, IsReverse, false> getNonConst() const { |
129 | if (NodePtr) |
130 | return ilist_iterator<OptionsT, IsReverse, false>( |
131 | const_cast<typename ilist_iterator<OptionsT, IsReverse, |
132 | false>::node_reference>(*NodePtr)); |
133 | return ilist_iterator<OptionsT, IsReverse, false>(); |
134 | } |
135 | |
136 | // Accessors... |
137 | reference operator*() const { |
138 | assert(!NodePtr->isKnownSentinel())((void)0); |
139 | return *Access::getValuePtr(NodePtr); |
140 | } |
141 | pointer operator->() const { return &operator*(); } |
142 | |
143 | // Comparison operators |
144 | friend bool operator==(const ilist_iterator &LHS, const ilist_iterator &RHS) { |
145 | return LHS.NodePtr == RHS.NodePtr; |
146 | } |
147 | friend bool operator!=(const ilist_iterator &LHS, const ilist_iterator &RHS) { |
148 | return LHS.NodePtr != RHS.NodePtr; |
149 | } |
150 | |
151 | // Increment and decrement operators... |
152 | ilist_iterator &operator--() { |
153 | NodePtr = IsReverse ? NodePtr->getNext() : NodePtr->getPrev(); |
154 | return *this; |
155 | } |
156 | ilist_iterator &operator++() { |
157 | NodePtr = IsReverse ? NodePtr->getPrev() : NodePtr->getNext(); |
158 | return *this; |
159 | } |
160 | ilist_iterator operator--(int) { |
161 | ilist_iterator tmp = *this; |
162 | --*this; |
163 | return tmp; |
164 | } |
165 | ilist_iterator operator++(int) { |
166 | ilist_iterator tmp = *this; |
167 | ++*this; |
168 | return tmp; |
169 | } |
170 | |
171 | /// Get the underlying ilist_node. |
172 | node_pointer getNodePtr() const { return static_cast<node_pointer>(NodePtr); } |
173 | |
174 | /// Check for end. Only valid if ilist_sentinel_tracking<true>. |
175 | bool isEnd() const { return NodePtr ? NodePtr->isSentinel() : false; } |
176 | }; |
177 | |
178 | template <typename From> struct simplify_type; |
179 | |
180 | /// Allow ilist_iterators to convert into pointers to a node automatically when |
181 | /// used by the dyn_cast, cast, isa mechanisms... |
182 | /// |
183 | /// FIXME: remove this, since there is no implicit conversion to NodeTy. |
184 | template <class OptionsT, bool IsConst> |
185 | struct simplify_type<ilist_iterator<OptionsT, false, IsConst>> { |
186 | using iterator = ilist_iterator<OptionsT, false, IsConst>; |
187 | using SimpleType = typename iterator::pointer; |
188 | |
189 | static SimpleType getSimplifiedValue(const iterator &Node) { return &*Node; } |
190 | }; |
191 | template <class OptionsT, bool IsConst> |
192 | struct simplify_type<const ilist_iterator<OptionsT, false, IsConst>> |
193 | : simplify_type<ilist_iterator<OptionsT, false, IsConst>> {}; |
194 | |
195 | } // end namespace llvm |
196 | |
197 | #endif // LLVM_ADT_ILIST_ITERATOR_H |