Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86PreAMXConfig.cpp
Warning:line 239, column 28
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86PreAMXConfig.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86PreAMXConfig.cpp

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86PreAMXConfig.cpp

1//===- Target/X86/X86PreAMXConfig.cpp - ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Insert tilecfg for each area of key AMX intrinsic.
10/// All the key AMX intrinsic's tile operand must come from tileload. And the
11/// def tile of key AMX intrinsic must be tilestored.
12/// take tdpbssd for example:
13/// --------------------------------------------------------------------------
14/// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(...) key
15/// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(...) |
16/// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(...) amx
17/// %td = tail call x86_amx @llvm.x86.tdpbssd.internal(t1, t2, t3) |
18/// call void @llvm.x86.tilestored64.internal(... td) area
19/// --------------------------------------------------------------------------
20/// This pass will insert tilecfg before every key-amx-area, some like:
21/// --------------------------------------------------------------------------
22/// %cfgmem = alloca <16 x i32>, align 4 * allocate mem
23/// store <16 x i32> zeroinitializer, <16 x i32>* %cfgmem * zero init
24/// ...
25/// ... pre-config shape of %t1 *
26/// store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 *
27/// store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config
28/// ... *
29/// ... pre-config shape of %t2 * shapes
30/// store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 *
31/// store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 *
32/// ...
33/// call void @llvm.x86.ldtilecfg(i8* %cfgmem) * tile config
34//
35//===----------------------------------------------------------------------===//
36//
37#include "X86.h"
38#include "llvm/ADT/SmallSet.h"
39#include "llvm/Analysis/TargetTransformInfo.h"
40#include "llvm/CodeGen/Passes.h"
41#include "llvm/CodeGen/TargetPassConfig.h"
42#include "llvm/CodeGen/ValueTypes.h"
43#include "llvm/IR/DataLayout.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/IRBuilder.h"
46#include "llvm/IR/Instructions.h"
47#include "llvm/IR/IntrinsicInst.h"
48#include "llvm/IR/IntrinsicsX86.h"
49#include "llvm/IR/PatternMatch.h"
50#include "llvm/InitializePasses.h"
51#include "llvm/Pass.h"
52#include "llvm/Support/raw_ostream.h"
53#include "llvm/Target/TargetMachine.h"
54
55using namespace llvm;
56using namespace PatternMatch;
57
58#define DEBUG_TYPE"pre-amx-config" "pre-amx-config"
59
60static bool isAMXIntrinsic(IntrinsicInst *II) {
61 for (Value *Operand : II->operands())
62 if (Operand->getType()->isX86_AMXTy())
63 return true;
64 return II->getType()->isX86_AMXTy();
65}
66
67static bool isTileLoad(IntrinsicInst *II) {
68 return II->getIntrinsicID() == Intrinsic::x86_tileloadd64_internal ||
69 II->getIntrinsicID() == Intrinsic::x86_tileloaddt164_internal;
70}
71
72static bool isTileStore(IntrinsicInst *II) {
73 return II->getIntrinsicID() == Intrinsic::x86_tilestored64_internal;
74}
75
76#ifndef NDEBUG1
77static bool onlyTileDef(IntrinsicInst *II) {
78 for (Value *Operand : II->operands())
79 if (Operand->getType()->isX86_AMXTy())
80 return false;
81 return II->getType()->isX86_AMXTy();
82}
83
84static bool brokenVolatile(Instruction *I) {
85 // Todo: it is weak to identify a normal call here.
86 if ((isa<CallInst>(I) && !isa<IntrinsicInst>(I)) || I->isTerminator())
87 return true;
88 return false;
89}
90#endif
91
92namespace {
93class X86PreAMXConfig {
94 Function &F;
95
96public:
97 X86PreAMXConfig(Function &Func) : F(Func) {}
98 bool preTileConfig();
99 bool addTileConfig(Instruction *ModelStart, SmallVector<Value *, 8> &Shapes);
100 bool findConfigShapes(
101 DenseMap<Instruction *, SmallVector<Value *, 8>> &PosAndShapes);
102 bool getKeyAMXShapes(IntrinsicInst *KeyAMX, SmallVector<Value *, 8> &Shapes);
103 bool preWriteTileCfg(Value *I8Ptr, Instruction *Pos,
104 SmallVector<Value *, 8> &Shapes);
105 BasicBlock::iterator
106 getShapesAndConfigPosEnd(BasicBlock::iterator Iter,
107 SmallVector<Value *, 8> &Shapes);
108 bool checkVolatileModel(SmallSet<Value *, 4> &Loads, IntrinsicInst *Store,
109 IntrinsicInst *KeyAMX);
110};
111
112// Orderly write the shapes in tilecfg's mem. This maybe not right.
113// Because the first shape may not corresponding to the first tmm register,
114// so we need to handle at at X86FastTileConfig::materializeTileCfg()
115// after register allocation.
116// For example:
117// --------------------------------------------------------------------------
118// zeroinitialize tilecfg's mem (of ldtilecfg)
119// --------------------------------------------------------------------------
120// ... pre-config shape of %t1 *
121// %amx.tmm.0.shape.row = getelementptr i8, i8* %mem, i64 48 *
122// %amx.tmm.0.shape.col = getelementptr i16, i16* %mem, i64 16 *
123// store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 *
124// store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config
125// ... *
126// ... pre-config shape of %t2 *
127// %amx.tmm.1.shape.row = getelementptr i8, i8* %mem, i64 49 *
128// %amx.tmm.1.shape.col = getelementptr i16, i16* %mem, i64 18 *
129// store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 * shapes
130// store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 *
131// ... *
132// ... pre-config shape of %t3 * of
133// %amx.tmm.2.shape.row = getelementptr i8, i8* %mem, i64 50 *
134// %amx.tmm.2.shape.col = getelementptr i16, i16* %mem, i64 20 *
135// store volatile i8 %m, i8* %amx.tmm.2.shape.row, align 1 *
136// store volatile i16 %n, i16* %amx.tmm.2.shape.col, align 2 *
137// ... * tiles
138// ... pre-config shape of %td *
139// %amx.tmm.3.shape.row = getelementptr i8, i8* %mem, i64 51 *
140// %amx.tmm.3.shape.col = getelementptr i16, i16* %mem, i64 22 *
141// store volatile i8 %m, i8* %amx.tmm.3.shape.row, align 1 *
142// store volatile i16 %n, i16* %amx.tmm.3.shape.col, align 2 *
143// --------------------------------------------------------------------------
144// call void @llvm.x86.ldtilecfg(i8* %mem) * tile config
145// --------------------------------------------------------------------------
146// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) key
147// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...)
148// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) amx
149// %td = tail call x86_amx @llvm.x86.tdpbssd.internal(m, n, k, t1, t2, t3)
150// call void @llvm.x86.tilestored64.internal(... td) area
151// --------------------------------------------------------------------------
152bool X86PreAMXConfig::preWriteTileCfg(Value *I8Ptr, Instruction *Pos,
153 SmallVector<Value *, 8> &Shapes) {
154 bool Write = false;
155 LLVMContext &Ctx = Pos->getParent()->getContext();
156 Type *I8Ty = Type::getInt8Ty(Ctx);
157 Type *I16Ty = Type::getInt16Ty(Ctx);
158
159 // TODO: Currently we defaultly set Palette = 1, it may be assigned to
160 // other value in the future.
161 Value *PaletteOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 0);
162 Value *PaletteValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1);
163 Value *PalettePos =
164 GetElementPtrInst::Create(I8Ty, I8Ptr, PaletteOffset, "", Pos);
165 new StoreInst(PaletteValue, PalettePos, Pos);
166
167 for (int I = 0, E = Shapes.size() / 2; I < E; I++) {
168 Value *RowOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 48 + I);
169 Value *ColOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 16 + I * 2);
170 const std::string ShapeName = "amx.tmm." + itostr(I);
171 Value *RowPos = GetElementPtrInst::Create(I8Ty, I8Ptr, RowOffset,
172 ShapeName + ".shape.row", Pos);
173 Value *ColPos = GetElementPtrInst::Create(I8Ty, I8Ptr, ColOffset, "", Pos);
174 ColPos = new BitCastInst(ColPos, PointerType::get(I16Ty, 0),
175 ShapeName + ".shape.col", Pos);
176 Value *Row = Shapes[I * 2];
177 Value *Col = Shapes[I * 2 + 1];
178 Row = new TruncInst(Row, I8Ty, "", Pos);
179 new StoreInst(Row, RowPos, Pos);
180 new StoreInst(Col, ColPos, Pos);
181 Write = true;
182 }
183 return Write;
184}
185
186bool X86PreAMXConfig::addTileConfig(Instruction *ModelStart,
187 SmallVector<Value *, 8> &Shapes) {
188 Module *M = F.getParent();
189 IRBuilder<> Builder(ModelStart);
190 const DataLayout &DL = M->getDataLayout();
191 unsigned AddrSpace = DL.getAllocaAddrSpace();
192 LLVMContext &Ctx = Builder.getContext();
193 Type *V512Ty = VectorType::get(Builder.getInt32Ty(), 16, false);
194 Align Alignment = DL.getPrefTypeAlign(Type::getInt32Ty(Ctx));
195
196 AllocaInst *Addr =
197 new AllocaInst(V512Ty, AddrSpace, "", &F.getEntryBlock().front());
198 Addr->setAlignment(Alignment);
199 Value *I8Ptr = Builder.CreateBitCast(Addr, Builder.getInt8PtrTy());
200
201 std::array<Value *, 1> Args = {I8Ptr};
202 Instruction *Cfg =
203 Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, None, Args);
204
205 Value *Val0 = Constant::getNullValue(V512Ty);
206 Instruction *Init0 = new StoreInst(Val0, Addr, false, Alignment, Cfg);
207 assert(Init0 && "Not Zero initilizate the cfg mem!")((void)0);
208
209 preWriteTileCfg(I8Ptr, Cfg, Shapes);
210
211 return Init0;
212}
213
214// Todo: We may need to handle "more than one store" case in the future.
215bool X86PreAMXConfig::checkVolatileModel(SmallSet<Value *, 4> &Loads,
216 IntrinsicInst *Store,
217 IntrinsicInst *KeyAMX) {
218 Value *ST = Store->getOperand(4);
219
220 // Only has tileload and tilestore.
221 if (!KeyAMX)
222 return (Loads.size() == 1) && Loads.contains(ST);
223
224 // All Loads should be operands of KeyAMX.
225 // All tile operands of KeyAMX should come from Loads.
226 for (Value *Op : KeyAMX->operands()) {
227 if (Op->getType()->isX86_AMXTy())
228 if (!Loads.erase(Op))
229 return false;
230 }
231
232 // The def of KeyAMX should be stored into mem.
233 // Todo: is it key amx can be no def?
234 return Loads.empty() && (ST == cast<Value>(KeyAMX));
235}
236
237bool X86PreAMXConfig::getKeyAMXShapes(IntrinsicInst *KeyAMX,
238 SmallVector<Value *, 8> &Shapes) {
239 for (unsigned I = 0; I < KeyAMX->getNumOperands(); I++) {
20
Called C++ object pointer is null
240 Value *Op = KeyAMX->getOperand(I);
241 if (!Op->getType()->isX86_AMXTy())
242 continue;
243 IntrinsicInst *TileDef = dyn_cast<IntrinsicInst>(Op);
244 assert((TileDef && isTileLoad(TileDef)) &&((void)0)
245 "All KeyAMX's tile definiation should comes from TileLoad!")((void)0);
246 Shapes.push_back(TileDef->getOperand(0));
247 Shapes.push_back(TileDef->getOperand(1));
248 }
249 if (!isTileStore(KeyAMX)) {
250 Shapes.push_back(KeyAMX->getOperand(0));
251 Shapes.push_back(KeyAMX->getOperand(1));
252 }
253 return Shapes.size() != 0;
254}
255
256// Collect the shapes and skip the area of current key amx intrinsic.
257//
258// For example:
259// ...
260// --------------------------------------------------------------------------
261// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) record (m,k)
262// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...) record (m,k)
263// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) record (m,k)
264// %td = call x86_amx @llvm.x86.tdpbssd.internal(...t1, t2, t3)
265// call void @llvm.x86.tilestored64.internal(m, n,... td) <--PosEnd record (m,k)
266// --------------------------------------------------------------------------
267BasicBlock::iterator
268X86PreAMXConfig::getShapesAndConfigPosEnd(BasicBlock::iterator Iter,
269 SmallVector<Value *, 8> &Shapes) {
270 IntrinsicInst *KeyAMX = nullptr;
271 BasicBlock *BB = Iter->getParent();
272 BasicBlock::iterator PosEnd = BB->end();
273 SmallSet<Value *, 4> Loads;
274
275 // See TileStore as "Config Position End" and check volatile model.
276 for (auto I = Iter, E = BB->end(); I != E; ++I) {
10
Calling 'operator!='
13
Returning from 'operator!='
14
Loop condition is false. Execution continues on line 294
277 assert(!brokenVolatile(&*I) && "Not reach tile store!")((void)0);
278 IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*I);
279 if (!II || !isAMXIntrinsic(II))
280 continue;
281
282 if (isTileLoad(II)) {
283 Loads.insert(II);
284 } else if (isTileStore(II)) {
285 if (!checkVolatileModel(Loads, II, KeyAMX))
286 report_fatal_error("Not Volatile AMX Model!");
287 PosEnd = I;
288 break;
289 } else {
290 assert(!KeyAMX && "Too many key amx intrinsic!")((void)0);
291 KeyAMX = II;
292 }
293 }
294 assert(PosEnd != BB->end() && "Not find TileStore!")((void)0);
295
296 // See KeyAMX as TileStore if only TileLoad and TileStore.
297 if (!KeyAMX
14.1
'KeyAMX' is null
14.1
'KeyAMX' is null
)
15
Taking true branch
298 KeyAMX = dyn_cast<IntrinsicInst>(&*PosEnd);
16
Assuming the object is not a 'IntrinsicInst'
17
Null pointer value stored to 'KeyAMX'
299
300 // Get Shapes in order.
301 assert(Shapes.empty() && "Shapes should be clean.")((void)0);
302 getKeyAMXShapes(KeyAMX, Shapes);
18
Passing null pointer value via 1st parameter 'KeyAMX'
19
Calling 'X86PreAMXConfig::getKeyAMXShapes'
303
304 return PosEnd;
305}
306
307// Record a key amx area's shapes with its position.
308// Use the first tileload as its position.
309// For example:
310// ...
311// --------------------------------------------------------------------------
312// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) <-- pos
313// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...) /
314// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) shapes:
315// %td = call x86_amx @llvm.x86.tdpbssd.internal(...t1, t2, t3) (m,k)(k,n)
316// call void @llvm.x86.tilestored64.internal(m, n,... td) (m,n)(m,n)
317// --------------------------------------------------------------------------
318bool X86PreAMXConfig::findConfigShapes(
319 DenseMap<Instruction *, SmallVector<Value *, 8>> &PosAndShapes) {
320 bool Find = false;
321 for (BasicBlock &BB : F) {
322 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
5
Loop condition is true. Entering loop body
323 IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*I);
6
Assuming the object is a 'IntrinsicInst'
324 if (!II
6.1
'II' is non-null
6.1
'II' is non-null
)
7
Taking false branch
325 continue;
326 if (!isAMXIntrinsic(II))
8
Taking false branch
327 continue;
328 assert(onlyTileDef(II) && "Not volatile model for AMX at O0!")((void)0);
329
330 I = getShapesAndConfigPosEnd(I, PosAndShapes[&*I]);
9
Calling 'X86PreAMXConfig::getShapesAndConfigPosEnd'
331 Find = true;
332 }
333 }
334 return Find;
335}
336
337// Insert ldtilecfg and preconfig the shapes for each area of key AMX intrinsic.
338// e.g. (key amx = tdpbssd)
339// --------------------------------------------------------------------------
340// %cfgmem = alloca <16 x i32>, align 4 * allocate mem
341// store <16 x i32> zeroinitializer, <16 x i32>* %cfgmem * zero init
342// ...
343// ... pre-config shape of %t1 *
344// store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 *
345// store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config
346// ... *
347// ... pre-config shape of %t2 *
348// store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 * shapes
349// store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 *
350// ... *
351// ... pre-config shape of %t3 * of
352// store volatile i8 %m, i8* %amx.tmm.2.shape.row, align 1 *
353// store volatile i16 %n, i16* %amx.tmm.2.shape.col, align 2 *
354// ... * tiles
355// ... pre-config shape of %td *
356// store volatile i8 %m, i8* %amx.tmm.3.shape.row, align 1 *
357// store volatile i16 %n, i16* %amx.tmm.3.shape.col, align 2 *
358//
359// call void @llvm.x86.ldtilecfg(i8* %cfgmem) * pre-config
360// --------------------------------------------------------------------------
361// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) key
362// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...)
363// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) amx
364// %td = tail call x86_amx @llvm.x86.tdpbssd.internal(m, n, k, t1, t2, t3)
365// call void @llvm.x86.tilestored64.internal(... td) area
366// --------------------------------------------------------------------------
367bool X86PreAMXConfig::preTileConfig() {
368 DenseMap<Instruction *, SmallVector<Value *, 8>> PosAndShapes;
369 bool NeedCfg = findConfigShapes(PosAndShapes);
4
Calling 'X86PreAMXConfig::findConfigShapes'
370 if (!NeedCfg)
371 return false;
372 for (auto &IPAndShapes : PosAndShapes)
373 addTileConfig(IPAndShapes.first, IPAndShapes.second);
374
375 return true;
376}
377} // anonymous namespace
378
379namespace {
380
381class X86PreAMXConfigPass : public FunctionPass {
382public:
383 static char ID;
384
385 X86PreAMXConfigPass() : FunctionPass(ID) {
386 initializeX86PreAMXConfigPassPass(*PassRegistry::getPassRegistry());
387 }
388
389 bool runOnFunction(Function &F) override {
390 TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
391 bool C = false;
392
393 // Prepare for fast register allocation at O0.
394 if (TM->getOptLevel() == CodeGenOpt::None) {
1
Assuming the condition is true
2
Taking true branch
395
396 // We pre-config each key AMX intrinsic at O0.
397 // In theory, one tile config can cover several AMX intrinsics, but
398 // it is very diffcult to classify the tile shapes at O0. So here we
399 // let thing be easy, pre-config every key AMX intrinsic.
400 X86PreAMXConfig PCFG(F);
401 C = PCFG.preTileConfig();
3
Calling 'X86PreAMXConfig::preTileConfig'
402 }
403
404 return C;
405 }
406
407 void getAnalysisUsage(AnalysisUsage &AU) const override {
408 AU.setPreservesCFG();
409 AU.addRequired<TargetPassConfig>();
410 }
411};
412
413} // anonymous namespace
414
415static const char PassName[] = "Pre AMX Tile Config";
416char X86PreAMXConfigPass::ID = 0;
417INITIALIZE_PASS_BEGIN(X86PreAMXConfigPass, DEBUG_TYPE, PassName, false, false)static void *initializeX86PreAMXConfigPassPassOnce(PassRegistry
&Registry) {
418INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)initializeTargetPassConfigPass(Registry);
419INITIALIZE_PASS_END(X86PreAMXConfigPass, DEBUG_TYPE, PassName, false, false)PassInfo *PI = new PassInfo( PassName, "pre-amx-config", &
X86PreAMXConfigPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<X86PreAMXConfigPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeX86PreAMXConfigPassPassFlag
; void llvm::initializeX86PreAMXConfigPassPass(PassRegistry &
Registry) { llvm::call_once(InitializeX86PreAMXConfigPassPassFlag
, initializeX86PreAMXConfigPassPassOnce, std::ref(Registry));
}
420
421FunctionPass *llvm::createX86PreAMXConfigPass() {
422 return new X86PreAMXConfigPass();
423}

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT/ilist_iterator.h

1//===- llvm/ADT/ilist_iterator.h - Intrusive List Iterator ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_ADT_ILIST_ITERATOR_H
10#define LLVM_ADT_ILIST_ITERATOR_H
11
12#include "llvm/ADT/ilist_node.h"
13#include <cassert>
14#include <cstddef>
15#include <iterator>
16#include <type_traits>
17
18namespace llvm {
19
20namespace ilist_detail {
21
22/// Find const-correct node types.
23template <class OptionsT, bool IsConst> struct IteratorTraits;
24template <class OptionsT> struct IteratorTraits<OptionsT, false> {
25 using value_type = typename OptionsT::value_type;
26 using pointer = typename OptionsT::pointer;
27 using reference = typename OptionsT::reference;
28 using node_pointer = ilist_node_impl<OptionsT> *;
29 using node_reference = ilist_node_impl<OptionsT> &;
30};
31template <class OptionsT> struct IteratorTraits<OptionsT, true> {
32 using value_type = const typename OptionsT::value_type;
33 using pointer = typename OptionsT::const_pointer;
34 using reference = typename OptionsT::const_reference;
35 using node_pointer = const ilist_node_impl<OptionsT> *;
36 using node_reference = const ilist_node_impl<OptionsT> &;
37};
38
39template <bool IsReverse> struct IteratorHelper;
40template <> struct IteratorHelper<false> : ilist_detail::NodeAccess {
41 using Access = ilist_detail::NodeAccess;
42
43 template <class T> static void increment(T *&I) { I = Access::getNext(*I); }
44 template <class T> static void decrement(T *&I) { I = Access::getPrev(*I); }
45};
46template <> struct IteratorHelper<true> : ilist_detail::NodeAccess {
47 using Access = ilist_detail::NodeAccess;
48
49 template <class T> static void increment(T *&I) { I = Access::getPrev(*I); }
50 template <class T> static void decrement(T *&I) { I = Access::getNext(*I); }
51};
52
53} // end namespace ilist_detail
54
55/// Iterator for intrusive lists based on ilist_node.
56template <class OptionsT, bool IsReverse, bool IsConst>
57class ilist_iterator : ilist_detail::SpecificNodeAccess<OptionsT> {
58 friend ilist_iterator<OptionsT, IsReverse, !IsConst>;
59 friend ilist_iterator<OptionsT, !IsReverse, IsConst>;
60 friend ilist_iterator<OptionsT, !IsReverse, !IsConst>;
61
62 using Traits = ilist_detail::IteratorTraits<OptionsT, IsConst>;
63 using Access = ilist_detail::SpecificNodeAccess<OptionsT>;
64
65public:
66 using value_type = typename Traits::value_type;
67 using pointer = typename Traits::pointer;
68 using reference = typename Traits::reference;
69 using difference_type = ptrdiff_t;
70 using iterator_category = std::bidirectional_iterator_tag;
71 using const_pointer = typename OptionsT::const_pointer;
72 using const_reference = typename OptionsT::const_reference;
73
74private:
75 using node_pointer = typename Traits::node_pointer;
76 using node_reference = typename Traits::node_reference;
77
78 node_pointer NodePtr = nullptr;
79
80public:
81 /// Create from an ilist_node.
82 explicit ilist_iterator(node_reference N) : NodePtr(&N) {}
83
84 explicit ilist_iterator(pointer NP) : NodePtr(Access::getNodePtr(NP)) {}
85 explicit ilist_iterator(reference NR) : NodePtr(Access::getNodePtr(&NR)) {}
86 ilist_iterator() = default;
87
88 // This is templated so that we can allow constructing a const iterator from
89 // a nonconst iterator...
90 template <bool RHSIsConst>
91 ilist_iterator(const ilist_iterator<OptionsT, IsReverse, RHSIsConst> &RHS,
92 std::enable_if_t<IsConst || !RHSIsConst, void *> = nullptr)
93 : NodePtr(RHS.NodePtr) {}
94
95 // This is templated so that we can allow assigning to a const iterator from
96 // a nonconst iterator...
97 template <bool RHSIsConst>
98 std::enable_if_t<IsConst || !RHSIsConst, ilist_iterator &>
99 operator=(const ilist_iterator<OptionsT, IsReverse, RHSIsConst> &RHS) {
100 NodePtr = RHS.NodePtr;
101 return *this;
102 }
103
104 /// Explicit conversion between forward/reverse iterators.
105 ///
106 /// Translate between forward and reverse iterators without changing range
107 /// boundaries. The resulting iterator will dereference (and have a handle)
108 /// to the previous node, which is somewhat unexpected; but converting the
109 /// two endpoints in a range will give the same range in reverse.
110 ///
111 /// This matches std::reverse_iterator conversions.
112 explicit ilist_iterator(
113 const ilist_iterator<OptionsT, !IsReverse, IsConst> &RHS)
114 : ilist_iterator(++RHS.getReverse()) {}
115
116 /// Get a reverse iterator to the same node.
117 ///
118 /// Gives a reverse iterator that will dereference (and have a handle) to the
119 /// same node. Converting the endpoint iterators in a range will give a
120 /// different range; for range operations, use the explicit conversions.
121 ilist_iterator<OptionsT, !IsReverse, IsConst> getReverse() const {
122 if (NodePtr)
123 return ilist_iterator<OptionsT, !IsReverse, IsConst>(*NodePtr);
124 return ilist_iterator<OptionsT, !IsReverse, IsConst>();
125 }
126
127 /// Const-cast.
128 ilist_iterator<OptionsT, IsReverse, false> getNonConst() const {
129 if (NodePtr)
130 return ilist_iterator<OptionsT, IsReverse, false>(
131 const_cast<typename ilist_iterator<OptionsT, IsReverse,
132 false>::node_reference>(*NodePtr));
133 return ilist_iterator<OptionsT, IsReverse, false>();
134 }
135
136 // Accessors...
137 reference operator*() const {
138 assert(!NodePtr->isKnownSentinel())((void)0);
139 return *Access::getValuePtr(NodePtr);
140 }
141 pointer operator->() const { return &operator*(); }
142
143 // Comparison operators
144 friend bool operator==(const ilist_iterator &LHS, const ilist_iterator &RHS) {
145 return LHS.NodePtr == RHS.NodePtr;
146 }
147 friend bool operator!=(const ilist_iterator &LHS, const ilist_iterator &RHS) {
148 return LHS.NodePtr != RHS.NodePtr;
11
Assuming 'LHS.NodePtr' is equal to 'RHS.NodePtr'
12
Returning zero, which participates in a condition later
149 }
150
151 // Increment and decrement operators...
152 ilist_iterator &operator--() {
153 NodePtr = IsReverse ? NodePtr->getNext() : NodePtr->getPrev();
154 return *this;
155 }
156 ilist_iterator &operator++() {
157 NodePtr = IsReverse ? NodePtr->getPrev() : NodePtr->getNext();
158 return *this;
159 }
160 ilist_iterator operator--(int) {
161 ilist_iterator tmp = *this;
162 --*this;
163 return tmp;
164 }
165 ilist_iterator operator++(int) {
166 ilist_iterator tmp = *this;
167 ++*this;
168 return tmp;
169 }
170
171 /// Get the underlying ilist_node.
172 node_pointer getNodePtr() const { return static_cast<node_pointer>(NodePtr); }
173
174 /// Check for end. Only valid if ilist_sentinel_tracking<true>.
175 bool isEnd() const { return NodePtr ? NodePtr->isSentinel() : false; }
176};
177
178template <typename From> struct simplify_type;
179
180/// Allow ilist_iterators to convert into pointers to a node automatically when
181/// used by the dyn_cast, cast, isa mechanisms...
182///
183/// FIXME: remove this, since there is no implicit conversion to NodeTy.
184template <class OptionsT, bool IsConst>
185struct simplify_type<ilist_iterator<OptionsT, false, IsConst>> {
186 using iterator = ilist_iterator<OptionsT, false, IsConst>;
187 using SimpleType = typename iterator::pointer;
188
189 static SimpleType getSimplifiedValue(const iterator &Node) { return &*Node; }
190};
191template <class OptionsT, bool IsConst>
192struct simplify_type<const ilist_iterator<OptionsT, false, IsConst>>
193 : simplify_type<ilist_iterator<OptionsT, false, IsConst>> {};
194
195} // end namespace llvm
196
197#endif // LLVM_ADT_ILIST_ITERATOR_H