clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIInstrInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | #include "SIInstrInfo.h" |
15 | #include "AMDGPU.h" |
16 | #include "AMDGPUInstrInfo.h" |
17 | #include "GCNHazardRecognizer.h" |
18 | #include "GCNSubtarget.h" |
19 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
20 | #include "SIMachineFunctionInfo.h" |
21 | #include "llvm/Analysis/ValueTracking.h" |
22 | #include "llvm/CodeGen/LiveVariables.h" |
23 | #include "llvm/CodeGen/MachineDominators.h" |
24 | #include "llvm/CodeGen/RegisterScavenging.h" |
25 | #include "llvm/CodeGen/ScheduleDAG.h" |
26 | #include "llvm/IR/DiagnosticInfo.h" |
27 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
28 | #include "llvm/MC/MCContext.h" |
29 | #include "llvm/Support/CommandLine.h" |
30 | #include "llvm/Target/TargetMachine.h" |
31 | |
32 | using namespace llvm; |
33 | |
34 | #define DEBUG_TYPE "si-instr-info" |
35 | |
36 | #define GET_INSTRINFO_CTOR_DTOR |
37 | #include "AMDGPUGenInstrInfo.inc" |
38 | |
39 | namespace llvm { |
40 | |
41 | class AAResults; |
42 | |
43 | namespace AMDGPU { |
44 | #define GET_D16ImageDimIntrinsics_IMPL |
45 | #define GET_ImageDimIntrinsicTable_IMPL |
46 | #define GET_RsrcIntrinsics_IMPL |
47 | #include "AMDGPUGenSearchableTables.inc" |
48 | } |
49 | } |
50 | |
51 | |
52 | |
53 | |
54 | |
55 | static cl::opt<unsigned> |
56 | BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), |
57 | cl::desc("Restrict range of branch instructions (DEBUG)")); |
58 | |
59 | static cl::opt<bool> Fix16BitCopies( |
60 | "amdgpu-fix-16-bit-physreg-copies", |
61 | cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), |
62 | cl::init(true), |
63 | cl::ReallyHidden); |
64 | |
65 | SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) |
66 | : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), |
67 | RI(ST), ST(ST) { |
68 | SchedModel.init(&ST); |
69 | } |
70 | |
71 | |
72 | |
73 | |
74 | |
75 | static unsigned getNumOperandsNoGlue(SDNode *Node) { |
76 | unsigned N = Node->getNumOperands(); |
77 | while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) |
78 | --N; |
79 | return N; |
80 | } |
81 | |
82 | |
83 | |
84 | static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { |
85 | unsigned Opc0 = N0->getMachineOpcode(); |
86 | unsigned Opc1 = N1->getMachineOpcode(); |
87 | |
88 | int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName); |
89 | int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName); |
90 | |
91 | if (Op0Idx == -1 && Op1Idx == -1) |
92 | return true; |
93 | |
94 | |
95 | if ((Op0Idx == -1 && Op1Idx != -1) || |
96 | (Op1Idx == -1 && Op0Idx != -1)) |
97 | return false; |
98 | |
99 | |
100 | |
101 | |
102 | |
103 | --Op0Idx; |
104 | --Op1Idx; |
105 | |
106 | return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); |
107 | } |
108 | |
109 | bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, |
110 | AAResults *AA) const { |
111 | if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI)) { |
112 | |
113 | |
114 | |
115 | |
116 | |
117 | |
118 | |
119 | return !MI.hasImplicitDef() && |
120 | MI.getNumImplicitOperands() == MI.getDesc().getNumImplicitUses() && |
121 | !MI.mayRaiseFPException(); |
122 | } |
123 | |
124 | return false; |
125 | } |
126 | |
127 | bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const { |
128 | |
129 | return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() && |
130 | isVALU(*MO.getParent()); |
131 | } |
132 | |
133 | bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, |
134 | int64_t &Offset0, |
135 | int64_t &Offset1) const { |
136 | if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode()) |
137 | return false; |
138 | |
139 | unsigned Opc0 = Load0->getMachineOpcode(); |
140 | unsigned Opc1 = Load1->getMachineOpcode(); |
141 | |
142 | |
143 | if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) |
144 | return false; |
145 | |
146 | if (isDS(Opc0) && isDS(Opc1)) { |
147 | |
148 | |
149 | if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1)) |
150 | return false; |
151 | |
152 | |
153 | if (Load0->getOperand(0) != Load1->getOperand(0)) |
154 | return false; |
155 | |
156 | |
157 | |
158 | |
159 | int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); |
160 | int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); |
161 | if (Offset0Idx == -1 || Offset1Idx == -1) |
162 | return false; |
163 | |
164 | |
165 | |
166 | |
167 | |
168 | Offset0Idx -= get(Opc0).NumDefs; |
169 | Offset1Idx -= get(Opc1).NumDefs; |
170 | Offset0 = cast<ConstantSDNode>(Load0->getOperand(Offset0Idx))->getZExtValue(); |
171 | Offset1 = cast<ConstantSDNode>(Load1->getOperand(Offset1Idx))->getZExtValue(); |
172 | return true; |
173 | } |
174 | |
175 | if (isSMRD(Opc0) && isSMRD(Opc1)) { |
176 | |
177 | if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 || |
178 | AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1) |
179 | return false; |
180 | |
181 | assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); |
182 | |
183 | |
184 | if (Load0->getOperand(0) != Load1->getOperand(0)) |
185 | return false; |
186 | |
187 | const ConstantSDNode *Load0Offset = |
188 | dyn_cast<ConstantSDNode>(Load0->getOperand(1)); |
189 | const ConstantSDNode *Load1Offset = |
190 | dyn_cast<ConstantSDNode>(Load1->getOperand(1)); |
191 | |
192 | if (!Load0Offset || !Load1Offset) |
193 | return false; |
194 | |
195 | Offset0 = Load0Offset->getZExtValue(); |
196 | Offset1 = Load1Offset->getZExtValue(); |
197 | return true; |
198 | } |
199 | |
200 | |
201 | if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { |
202 | |
203 | |
204 | if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) || |
205 | !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) || |
206 | !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc)) |
207 | return false; |
208 | |
209 | int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); |
210 | int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); |
211 | |
212 | if (OffIdx0 == -1 || OffIdx1 == -1) |
213 | return false; |
214 | |
215 | |
216 | |
217 | |
218 | OffIdx0 -= get(Opc0).NumDefs; |
219 | OffIdx1 -= get(Opc1).NumDefs; |
220 | |
221 | SDValue Off0 = Load0->getOperand(OffIdx0); |
222 | SDValue Off1 = Load1->getOperand(OffIdx1); |
223 | |
224 | |
225 | if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1)) |
226 | return false; |
227 | |
228 | Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue(); |
229 | Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); |
230 | return true; |
231 | } |
232 | |
233 | return false; |
234 | } |
235 | |
236 | static bool isStride64(unsigned Opc) { |
237 | switch (Opc) { |
238 | case AMDGPU::DS_READ2ST64_B32: |
239 | case AMDGPU::DS_READ2ST64_B64: |
240 | case AMDGPU::DS_WRITE2ST64_B32: |
241 | case AMDGPU::DS_WRITE2ST64_B64: |
242 | return true; |
243 | default: |
244 | return false; |
245 | } |
246 | } |
247 | |
248 | bool SIInstrInfo::getMemOperandsWithOffsetWidth( |
249 | const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, |
250 | int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, |
251 | const TargetRegisterInfo *TRI) const { |
252 | if (!LdSt.mayLoadOrStore()) |
253 | return false; |
254 | |
255 | unsigned Opc = LdSt.getOpcode(); |
256 | OffsetIsScalable = false; |
257 | const MachineOperand *BaseOp, *OffsetOp; |
258 | int DataOpIdx; |
259 | |
260 | if (isDS(LdSt)) { |
261 | BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr); |
262 | OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset); |
263 | if (OffsetOp) { |
264 | |
265 | if (!BaseOp) { |
266 | |
267 | |
268 | return false; |
269 | } |
270 | BaseOps.push_back(BaseOp); |
271 | Offset = OffsetOp->getImm(); |
272 | |
273 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); |
274 | if (DataOpIdx == -1) |
275 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); |
276 | Width = getOpSize(LdSt, DataOpIdx); |
277 | } else { |
278 | |
279 | |
280 | |
281 | const MachineOperand *Offset0Op = |
282 | getNamedOperand(LdSt, AMDGPU::OpName::offset0); |
283 | const MachineOperand *Offset1Op = |
284 | getNamedOperand(LdSt, AMDGPU::OpName::offset1); |
285 | |
286 | unsigned Offset0 = Offset0Op->getImm(); |
287 | unsigned Offset1 = Offset1Op->getImm(); |
288 | if (Offset0 + 1 != Offset1) |
289 | return false; |
290 | |
291 | |
292 | |
293 | |
294 | unsigned EltSize; |
295 | if (LdSt.mayLoad()) |
296 | EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16; |
297 | else { |
298 | assert(LdSt.mayStore()); |
299 | int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); |
300 | EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8; |
301 | } |
302 | |
303 | if (isStride64(Opc)) |
304 | EltSize *= 64; |
305 | |
306 | BaseOps.push_back(BaseOp); |
307 | Offset = EltSize * Offset0; |
308 | |
309 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); |
310 | if (DataOpIdx == -1) { |
311 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); |
312 | Width = getOpSize(LdSt, DataOpIdx); |
313 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1); |
314 | Width += getOpSize(LdSt, DataOpIdx); |
315 | } else { |
316 | Width = getOpSize(LdSt, DataOpIdx); |
317 | } |
318 | } |
319 | return true; |
320 | } |
321 | |
322 | if (isMUBUF(LdSt) || isMTBUF(LdSt)) { |
323 | const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc); |
324 | if (!RSrc) |
325 | return false; |
326 | BaseOps.push_back(RSrc); |
327 | BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); |
328 | if (BaseOp && !BaseOp->isFI()) |
329 | BaseOps.push_back(BaseOp); |
330 | const MachineOperand *OffsetImm = |
331 | getNamedOperand(LdSt, AMDGPU::OpName::offset); |
332 | Offset = OffsetImm->getImm(); |
333 | const MachineOperand *SOffset = |
334 | getNamedOperand(LdSt, AMDGPU::OpName::soffset); |
335 | if (SOffset) { |
336 | if (SOffset->isReg()) |
337 | BaseOps.push_back(SOffset); |
338 | else |
339 | Offset += SOffset->getImm(); |
340 | } |
341 | |
342 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); |
343 | if (DataOpIdx == -1) |
344 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); |
345 | Width = getOpSize(LdSt, DataOpIdx); |
346 | return true; |
347 | } |
348 | |
349 | if (isMIMG(LdSt)) { |
350 | int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); |
351 | BaseOps.push_back(&LdSt.getOperand(SRsrcIdx)); |
352 | int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); |
353 | if (VAddr0Idx >= 0) { |
354 | |
355 | for (int I = VAddr0Idx; I < SRsrcIdx; ++I) |
356 | BaseOps.push_back(&LdSt.getOperand(I)); |
357 | } else { |
358 | BaseOps.push_back(getNamedOperand(LdSt, AMDGPU::OpName::vaddr)); |
359 | } |
360 | Offset = 0; |
361 | |
362 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); |
363 | Width = getOpSize(LdSt, DataOpIdx); |
364 | return true; |
365 | } |
366 | |
367 | if (isSMRD(LdSt)) { |
368 | BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::sbase); |
369 | if (!BaseOp) |
370 | return false; |
371 | BaseOps.push_back(BaseOp); |
372 | OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset); |
373 | Offset = OffsetOp ? OffsetOp->getImm() : 0; |
374 | |
375 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst); |
376 | Width = getOpSize(LdSt, DataOpIdx); |
377 | return true; |
378 | } |
379 | |
380 | if (isFLAT(LdSt)) { |
381 | |
382 | BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); |
383 | if (BaseOp) |
384 | BaseOps.push_back(BaseOp); |
385 | BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr); |
386 | if (BaseOp) |
387 | BaseOps.push_back(BaseOp); |
388 | Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm(); |
389 | |
390 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); |
391 | if (DataOpIdx == -1) |
392 | DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); |
393 | Width = getOpSize(LdSt, DataOpIdx); |
394 | return true; |
395 | } |
396 | |
397 | return false; |
398 | } |
399 | |
400 | static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, |
401 | ArrayRef<const MachineOperand *> BaseOps1, |
402 | const MachineInstr &MI2, |
403 | ArrayRef<const MachineOperand *> BaseOps2) { |
404 | |
405 | |
406 | |
407 | if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front())) |
408 | return true; |
409 | |
410 | if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand()) |
411 | return false; |
412 | |
413 | auto MO1 = *MI1.memoperands_begin(); |
414 | auto MO2 = *MI2.memoperands_begin(); |
415 | if (MO1->getAddrSpace() != MO2->getAddrSpace()) |
416 | return false; |
417 | |
418 | auto Base1 = MO1->getValue(); |
419 | auto Base2 = MO2->getValue(); |
420 | if (!Base1 || !Base2) |
421 | return false; |
422 | Base1 = getUnderlyingObject(Base1); |
423 | Base2 = getUnderlyingObject(Base2); |
424 | |
425 | if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2)) |
426 | return false; |
427 | |
428 | return Base1 == Base2; |
429 | } |
430 | |
431 | bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, |
432 | ArrayRef<const MachineOperand *> BaseOps2, |
433 | unsigned NumLoads, |
434 | unsigned NumBytes) const { |
435 | |
436 | |
437 | if (!BaseOps1.empty() && !BaseOps2.empty()) { |
438 | const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); |
439 | const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); |
440 | if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) |
441 | return false; |
442 | } else if (!BaseOps1.empty() || !BaseOps2.empty()) { |
443 | |
444 | return false; |
445 | } |
446 | |
447 | |
448 | |
449 | |
450 | |
451 | |
452 | |
453 | |
454 | |
455 | |
456 | |
457 | |
458 | |
459 | const unsigned LoadSize = NumBytes / NumLoads; |
460 | const unsigned NumDWORDs = ((LoadSize + 3) / 4) * NumLoads; |
461 | return NumDWORDs <= 8; |
462 | } |
463 | |
464 | |
465 | |
466 | |
467 | |
468 | |
469 | |
470 | |
471 | |
472 | |
473 | |
474 | bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, |
475 | int64_t Offset0, int64_t Offset1, |
476 | unsigned NumLoads) const { |
477 | assert(Offset1 > Offset0 && |
478 | "Second offset should be larger than first offset!"); |
479 | |
480 | |
481 | |
482 | |
483 | return (NumLoads <= 16 && (Offset1 - Offset0) < 64); |
484 | } |
485 | |
486 | static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, |
487 | MachineBasicBlock::iterator MI, |
488 | const DebugLoc &DL, MCRegister DestReg, |
489 | MCRegister SrcReg, bool KillSrc, |
490 | const char *Msg = "illegal SGPR to VGPR copy") { |
491 | MachineFunction *MF = MBB.getParent(); |
492 | DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), Msg, DL, DS_Error); |
493 | LLVMContext &C = MF->getFunction().getContext(); |
494 | C.diagnose(IllegalCopy); |
495 | |
496 | BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg) |
497 | .addReg(SrcReg, getKillRegState(KillSrc)); |
498 | } |
499 | |
500 | |
501 | |
502 | static void indirectCopyToAGPR(const SIInstrInfo &TII, |
503 | MachineBasicBlock &MBB, |
504 | MachineBasicBlock::iterator MI, |
505 | const DebugLoc &DL, MCRegister DestReg, |
506 | MCRegister SrcReg, bool KillSrc, |
507 | RegScavenger &RS, |
508 | Register ImpDefSuperReg = Register(), |
509 | Register ImpUseSuperReg = Register()) { |
510 | const SIRegisterInfo &RI = TII.getRegisterInfo(); |
511 | |
512 | assert(AMDGPU::SReg_32RegClass.contains(SrcReg) || |
513 | AMDGPU::AGPR_32RegClass.contains(SrcReg)); |
514 | |
515 | |
516 | for (auto Def = MI, E = MBB.begin(); Def != E; ) { |
517 | --Def; |
518 | if (!Def->definesRegister(SrcReg, &RI)) |
519 | continue; |
520 | if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) |
521 | break; |
522 | |
523 | MachineOperand &DefOp = Def->getOperand(1); |
524 | assert(DefOp.isReg() || DefOp.isImm()); |
525 | |
526 | if (DefOp.isReg()) { |
527 | |
528 | |
529 | bool SafeToPropagate = true; |
530 | for (auto I = Def; I != MI && SafeToPropagate; ++I) |
531 | if (I->modifiesRegister(DefOp.getReg(), &RI)) |
532 | SafeToPropagate = false; |
533 | |
534 | if (!SafeToPropagate) |
535 | break; |
536 | |
537 | DefOp.setIsKill(false); |
538 | } |
539 | |
540 | MachineInstrBuilder Builder = |
541 | BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) |
542 | .add(DefOp); |
543 | if (ImpDefSuperReg) |
544 | Builder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit); |
545 | |
546 | if (ImpUseSuperReg) { |
547 | Builder.addReg(ImpUseSuperReg, |
548 | getKillRegState(KillSrc) | RegState::Implicit); |
549 | } |
550 | |
551 | return; |
552 | } |
553 | |
554 | RS.enterBasicBlock(MBB); |
555 | RS.forward(MI); |
556 | |
557 | |
558 | |
559 | unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass, |
560 | *MBB.getParent()); |
561 | |
562 | |
563 | |
564 | unsigned RegNo = DestReg % 3; |
565 | Register Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0); |
566 | if (!Tmp) |
567 | report_fatal_error("Cannot scavenge VGPR to copy to AGPR"); |
568 | RS.setRegUsed(Tmp); |
569 | |
570 | if (!TII.getSubtarget().hasGFX90AInsts()) { |
571 | |
572 | |
573 | |
574 | while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) { |
575 | Register Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0); |
576 | if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs) |
577 | break; |
578 | Tmp = Tmp2; |
579 | RS.setRegUsed(Tmp); |
580 | } |
581 | } |
582 | |
583 | |
584 | unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32; |
585 | if (AMDGPU::AGPR_32RegClass.contains(SrcReg)) { |
586 | TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64; |
587 | } else { |
588 | assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); |
589 | } |
590 | |
591 | MachineInstrBuilder UseBuilder = BuildMI(MBB, MI, DL, TII.get(TmpCopyOp), Tmp) |
592 | .addReg(SrcReg, getKillRegState(KillSrc)); |
593 | if (ImpUseSuperReg) { |
594 | UseBuilder.addReg(ImpUseSuperReg, |
595 | getKillRegState(KillSrc) | RegState::Implicit); |
596 | } |
597 | |
598 | MachineInstrBuilder DefBuilder |
599 | = BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) |
600 | .addReg(Tmp, RegState::Kill); |
601 | |
602 | if (ImpDefSuperReg) |
603 | DefBuilder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit); |
604 | } |
605 | |
606 | static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, |
607 | MachineBasicBlock::iterator MI, const DebugLoc &DL, |
608 | MCRegister DestReg, MCRegister SrcReg, bool KillSrc, |
609 | const TargetRegisterClass *RC, bool Forward) { |
610 | const SIRegisterInfo &RI = TII.getRegisterInfo(); |
611 | ArrayRef<int16_t> BaseIndices = RI.getRegSplitParts(RC, 4); |
612 | MachineBasicBlock::iterator I = MI; |
613 | MachineInstr *FirstMI = nullptr, *LastMI = nullptr; |
| 18 | | 'FirstMI' initialized to a null pointer value | |
|
614 | |
615 | for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) { |
| 19 | | Assuming the condition is false | |
|
| 20 | | Loop condition is false. Execution continues on line 643 | |
|
616 | int16_t SubIdx = BaseIndices[Idx]; |
617 | Register Reg = RI.getSubReg(DestReg, SubIdx); |
618 | unsigned Opcode = AMDGPU::S_MOV_B32; |
619 | |
620 | |
621 | Register Src = RI.getSubReg(SrcReg, SubIdx); |
622 | bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2) == 0; |
623 | bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0; |
624 | if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) { |
625 | |
626 | unsigned Channel = RI.getChannelFromSubReg(SubIdx); |
627 | SubIdx = RI.getSubRegFromChannel(Channel, 2); |
628 | Opcode = AMDGPU::S_MOV_B64; |
629 | Idx++; |
630 | } |
631 | |
632 | LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), RI.getSubReg(DestReg, SubIdx)) |
633 | .addReg(RI.getSubReg(SrcReg, SubIdx)) |
634 | .addReg(SrcReg, RegState::Implicit); |
635 | |
636 | if (!FirstMI) |
637 | FirstMI = LastMI; |
638 | |
639 | if (!Forward) |
640 | I--; |
641 | } |
642 | |
643 | assert(FirstMI && LastMI); |
644 | if (!Forward) |
| 21 | | Assuming 'Forward' is true | |
|
| |
645 | std::swap(FirstMI, LastMI); |
646 | |
647 | FirstMI->addOperand( |
| 23 | | Called C++ object pointer is null |
|
648 | MachineOperand::CreateReg(DestReg, true , true )); |
649 | |
650 | if (KillSrc) |
651 | LastMI->addRegisterKilled(SrcReg, &RI); |
652 | } |
653 | |
654 | void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, |
655 | MachineBasicBlock::iterator MI, |
656 | const DebugLoc &DL, MCRegister DestReg, |
657 | MCRegister SrcReg, bool KillSrc) const { |
658 | const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg); |
659 | |
660 | |
661 | |
662 | if (Fix16BitCopies && |
| 1 | Assuming the condition is false | |
|
663 | ((RI.getRegSizeInBits(*RC) == 16) ^ |
664 | (RI.getRegSizeInBits(*RI.getPhysRegClass(SrcReg)) == 16))) { |
665 | MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg; |
666 | MCRegister Super = RI.get32BitRegister(RegToFix); |
667 | assert(RI.getSubReg(Super, AMDGPU::lo16) == RegToFix); |
668 | RegToFix = Super; |
669 | |
670 | if (DestReg == SrcReg) { |
671 | |
672 | BuildMI(MBB, MI, DL, get(AMDGPU::BUNDLE)); |
673 | return; |
674 | } |
675 | |
676 | RC = RI.getPhysRegClass(DestReg); |
677 | } |
678 | |
679 | if (RC == &AMDGPU::VGPR_32RegClass) { |
| 2 | | Assuming the condition is false | |
|
| |
680 | assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || |
681 | AMDGPU::SReg_32RegClass.contains(SrcReg) || |
682 | AMDGPU::AGPR_32RegClass.contains(SrcReg)); |
683 | unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ? |
684 | AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32; |
685 | BuildMI(MBB, MI, DL, get(Opc), DestReg) |
686 | .addReg(SrcReg, getKillRegState(KillSrc)); |
687 | return; |
688 | } |
689 | |
690 | if (RC == &AMDGPU::SReg_32_XM0RegClass || |
| 4 | | Assuming the condition is false | |
|
| |
691 | RC == &AMDGPU::SReg_32RegClass) { |
| 5 | | Assuming the condition is false | |
|
692 | if (SrcReg == AMDGPU::SCC) { |
693 | BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg) |
694 | .addImm(1) |
695 | .addImm(0); |
696 | return; |
697 | } |
698 | |
699 | if (DestReg == AMDGPU::VCC_LO) { |
700 | if (AMDGPU::SReg_32RegClass.contains(SrcReg)) { |
701 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO) |
702 | .addReg(SrcReg, getKillRegState(KillSrc)); |
703 | } else { |
704 | |
705 | assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); |
706 | BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) |
707 | .addImm(0) |
708 | .addReg(SrcReg, getKillRegState(KillSrc)); |
709 | } |
710 | |
711 | return; |
712 | } |
713 | |
714 | if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { |
715 | reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); |
716 | return; |
717 | } |
718 | |
719 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) |
720 | .addReg(SrcReg, getKillRegState(KillSrc)); |
721 | return; |
722 | } |
723 | |
724 | if (RC == &AMDGPU::SReg_64RegClass) { |
| 7 | | Assuming the condition is false | |
|
| |
725 | if (SrcReg == AMDGPU::SCC) { |
726 | BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B64), DestReg) |
727 | .addImm(1) |
728 | .addImm(0); |
729 | return; |
730 | } |
731 | |
732 | if (DestReg == AMDGPU::VCC) { |
733 | if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { |
734 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) |
735 | .addReg(SrcReg, getKillRegState(KillSrc)); |
736 | } else { |
737 | |
738 | assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); |
739 | BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) |
740 | .addImm(0) |
741 | .addReg(SrcReg, getKillRegState(KillSrc)); |
742 | } |
743 | |
744 | return; |
745 | } |
746 | |
747 | if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) { |
748 | reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); |
749 | return; |
750 | } |
751 | |
752 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) |
753 | .addReg(SrcReg, getKillRegState(KillSrc)); |
754 | return; |
755 | } |
756 | |
757 | if (DestReg == AMDGPU::SCC) { |
| |
758 | |
759 | |
760 | if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { |
761 | |
762 | |
763 | |
764 | assert(ST.hasScalarCompareEq64()); |
765 | BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U64)) |
766 | .addReg(SrcReg, getKillRegState(KillSrc)) |
767 | .addImm(0); |
768 | } else { |
769 | assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); |
770 | BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32)) |
771 | .addReg(SrcReg, getKillRegState(KillSrc)) |
772 | .addImm(0); |
773 | } |
774 | |
775 | return; |
776 | } |
777 | |
778 | if (RC == &AMDGPU::AGPR_32RegClass) { |
| 10 | | Assuming the condition is false | |
|
| |
779 | if (AMDGPU::VGPR_32RegClass.contains(SrcReg)) { |
780 | BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) |
781 | .addReg(SrcReg, getKillRegState(KillSrc)); |
782 | return; |
783 | } |
784 | |
785 | if (AMDGPU::AGPR_32RegClass.contains(SrcReg) && ST.hasGFX90AInsts()) { |
786 | BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_MOV_B32), DestReg) |
787 | .addReg(SrcReg, getKillRegState(KillSrc)); |
788 | return; |
789 | } |
790 | |
791 | |
792 | |
793 | RegScavenger RS; |
794 | indirectCopyToAGPR(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RS); |
795 | return; |
796 | } |
797 | |
798 | const unsigned Size = RI.getRegSizeInBits(*RC); |
799 | if (Size == 16) { |
| 12 | | Assuming 'Size' is not equal to 16 | |
|
| |
800 | assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || |
801 | AMDGPU::VGPR_HI16RegClass.contains(SrcReg) || |
802 | AMDGPU::SReg_LO16RegClass.contains(SrcReg) || |
803 | AMDGPU::AGPR_LO16RegClass.contains(SrcReg)); |
804 | |
805 | bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg); |
806 | bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg); |
807 | bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg); |
808 | bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg); |
809 | bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) || |
810 | AMDGPU::SReg_LO16RegClass.contains(DestReg) || |
811 | AMDGPU::AGPR_LO16RegClass.contains(DestReg); |
812 | bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || |
813 | AMDGPU::SReg_LO16RegClass.contains(SrcReg) || |
814 | AMDGPU::AGPR_LO16RegClass.contains(SrcReg); |
815 | MCRegister NewDestReg = RI.get32BitRegister(DestReg); |
816 | MCRegister NewSrcReg = RI.get32BitRegister(SrcReg); |
817 | |
818 | if (IsSGPRDst) { |
819 | if (!IsSGPRSrc) { |
820 | reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); |
821 | return; |
822 | } |
823 | |
824 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), NewDestReg) |
825 | .addReg(NewSrcReg, getKillRegState(KillSrc)); |
826 | return; |
827 | } |
828 | |
829 | if (IsAGPRDst || IsAGPRSrc) { |
830 | if (!DstLow || !SrcLow) { |
831 | reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc, |
832 | "Cannot use hi16 subreg with an AGPR!"); |
833 | } |
834 | |
835 | copyPhysReg(MBB, MI, DL, NewDestReg, NewSrcReg, KillSrc); |
836 | return; |
837 | } |
838 | |
839 | if (IsSGPRSrc && !ST.hasSDWAScalar()) { |
840 | if (!DstLow || !SrcLow) { |
841 | reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc, |
842 | "Cannot use hi16 subreg on VI!"); |
843 | } |
844 | |
845 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), NewDestReg) |
846 | .addReg(NewSrcReg, getKillRegState(KillSrc)); |
847 | return; |
848 | } |
849 | |
850 | auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), NewDestReg) |
851 | .addImm(0) |
852 | .addReg(NewSrcReg) |
853 | .addImm(0) |
854 | .addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0 |
855 | : AMDGPU::SDWA::SdwaSel::WORD_1) |
856 | .addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) |
857 | .addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0 |
858 | : AMDGPU::SDWA::SdwaSel::WORD_1) |
859 | .addReg(NewDestReg, RegState::Implicit | RegState::Undef); |
860 | |
861 | MIB->tieOperands(0, MIB->getNumOperands() - 1); |
862 | return; |
863 | } |
864 | |
865 | const TargetRegisterClass *SrcRC = RI.getPhysRegClass(SrcReg); |
866 | if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) { |
| 14 | | Assuming the condition is false | |
|
867 | if (ST.hasPackedFP32Ops()) { |
868 | BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestReg) |
869 | .addImm(SISrcMods::OP_SEL_1) |
870 | .addReg(SrcReg) |
871 | .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) |
872 | .addReg(SrcReg) |
873 | .addImm(0) |
874 | .addImm(0) |
875 | .addImm(0) |
876 | .addImm(0) |
877 | .addImm(0) |
878 | .addReg(SrcReg, getKillRegState(KillSrc) | RegState::Implicit); |
879 | return; |
880 | } |
881 | } |
882 | |
883 | const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg); |
884 | if (RI.isSGPRClass(RC)) { |
| |
885 | if (!RI.isSGPRClass(SrcRC)) { |
| |
886 | reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); |
887 | return; |
888 | } |
889 | expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RC, Forward); |
| 17 | | Calling 'expandSGPRCopy' | |
|
890 | return; |
891 | } |
892 | |
893 | unsigned EltSize = 4; |
894 | unsigned Opcode = AMDGPU::V_MOV_B32_e32; |
895 | if (RI.hasAGPRs(RC)) { |
896 | Opcode = (RI.hasVGPRs(SrcRC)) ? |
897 | AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::INSTRUCTION_LIST_END; |
898 | } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(SrcRC)) { |
899 | Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64; |
900 | } else if ((Size % 64 == 0) && RI.hasVGPRs(RC) && |
901 | (RI.isProperlyAlignedRC(*RC) && |
902 | (SrcRC == RC || RI.isSGPRClass(SrcRC)))) { |
903 | |
904 | if (ST.hasPackedFP32Ops()) { |
905 | Opcode = AMDGPU::V_PK_MOV_B32; |
906 | EltSize = 8; |
907 | } |
908 | } |
909 | |
910 | |
911 | |
912 | |
913 | |
914 | |
915 | std::unique_ptr<RegScavenger> RS; |
916 | if (Opcode == AMDGPU::INSTRUCTION_LIST_END) |
917 | RS.reset(new RegScavenger()); |
918 | |
919 | ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize); |
920 | |
921 | |
922 | |
923 | const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg); |
924 | |
925 | for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { |
926 | unsigned SubIdx; |
927 | if (Forward) |
928 | SubIdx = SubIndices[Idx]; |
929 | else |
930 | SubIdx = SubIndices[SubIndices.size() - Idx - 1]; |
931 | |
932 | bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1; |
933 | |
934 | if (Opcode == AMDGPU::INSTRUCTION_LIST_END) { |
935 | Register ImpDefSuper = Idx == 0 ? Register(DestReg) : Register(); |
936 | Register ImpUseSuper = SrcReg; |
937 | indirectCopyToAGPR(*this, MBB, MI, DL, RI.getSubReg(DestReg, SubIdx), |
938 | RI.getSubReg(SrcReg, SubIdx), UseKill, *RS, |
939 | ImpDefSuper, ImpUseSuper); |
940 | } else if (Opcode == AMDGPU::V_PK_MOV_B32) { |
941 | Register DstSubReg = RI.getSubReg(DestReg, SubIdx); |
942 | Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx); |
943 | MachineInstrBuilder MIB = |
944 | BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DstSubReg) |
945 | .addImm(SISrcMods::OP_SEL_1) |
946 | .addReg(SrcSubReg) |
947 | .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) |
948 | .addReg(SrcSubReg) |
949 | .addImm(0) |
950 | .addImm(0) |
951 | .addImm(0) |
952 | .addImm(0) |
953 | .addImm(0) |
954 | .addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); |
955 | if (Idx == 0) |
956 | MIB.addReg(DestReg, RegState::Define | RegState::Implicit); |
957 | } else { |
958 | MachineInstrBuilder Builder = |
959 | BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, SubIdx)) |
960 | .addReg(RI.getSubReg(SrcReg, SubIdx)); |
961 | if (Idx == 0) |
962 | Builder.addReg(DestReg, RegState::Define | RegState::Implicit); |
963 | |
964 | Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); |
965 | } |
966 | } |
967 | } |
968 | |
969 | int SIInstrInfo::commuteOpcode(unsigned Opcode) const { |
970 | int NewOpc; |
971 | |
972 | |
973 | NewOpc = AMDGPU::getCommuteRev(Opcode); |
974 | if (NewOpc != -1) |
975 | |
976 | return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; |
977 | |
978 | |
979 | NewOpc = AMDGPU::getCommuteOrig(Opcode); |
980 | if (NewOpc != -1) |
981 | |
982 | return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; |
983 | |
984 | return Opcode; |
985 | } |
986 | |
987 | void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB, |
988 | MachineBasicBlock::iterator MI, |
989 | const DebugLoc &DL, unsigned DestReg, |
990 | int64_t Value) const { |
991 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
992 | const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg); |
993 | if (RegClass == &AMDGPU::SReg_32RegClass || |
994 | RegClass == &AMDGPU::SGPR_32RegClass || |
995 | RegClass == &AMDGPU::SReg_32_XM0RegClass || |
996 | RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) { |
997 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) |
998 | .addImm(Value); |
999 | return; |
1000 | } |
1001 | |
1002 | if (RegClass == &AMDGPU::SReg_64RegClass || |
1003 | RegClass == &AMDGPU::SGPR_64RegClass || |
1004 | RegClass == &AMDGPU::SReg_64_XEXECRegClass) { |
1005 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) |
1006 | .addImm(Value); |
1007 | return; |
1008 | } |
1009 | |
1010 | if (RegClass == &AMDGPU::VGPR_32RegClass) { |
1011 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) |
1012 | .addImm(Value); |
1013 | return; |
1014 | } |
1015 | if (RegClass->hasSuperClassEq(&AMDGPU::VReg_64RegClass)) { |
1016 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg) |
1017 | .addImm(Value); |
1018 | return; |
1019 | } |
1020 | |
1021 | unsigned EltSize = 4; |
1022 | unsigned Opcode = AMDGPU::V_MOV_B32_e32; |
1023 | if (RI.isSGPRClass(RegClass)) { |
1024 | if (RI.getRegSizeInBits(*RegClass) > 32) { |
1025 | Opcode = AMDGPU::S_MOV_B64; |
1026 | EltSize = 8; |
1027 | } else { |
1028 | Opcode = AMDGPU::S_MOV_B32; |
1029 | EltSize = 4; |
1030 | } |
1031 | } |
1032 | |
1033 | ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize); |
1034 | for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { |
1035 | int64_t IdxValue = Idx == 0 ? Value : 0; |
1036 | |
1037 | MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, |
1038 | get(Opcode), RI.getSubReg(DestReg, SubIndices[Idx])); |
1039 | Builder.addImm(IdxValue); |
1040 | } |
1041 | } |
1042 | |
1043 | const TargetRegisterClass * |
1044 | SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const { |
1045 | return &AMDGPU::VGPR_32RegClass; |
1046 | } |
1047 | |
1048 | void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, |
1049 | MachineBasicBlock::iterator I, |
1050 | const DebugLoc &DL, Register DstReg, |
1051 | ArrayRef<MachineOperand> Cond, |
1052 | Register TrueReg, |
1053 | Register FalseReg) const { |
1054 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
1055 | const TargetRegisterClass *BoolXExecRC = |
1056 | RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); |
1057 | assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass && |
1058 | "Not a VGPR32 reg"); |
1059 | |
1060 | if (Cond.size() == 1) { |
1061 | Register SReg = MRI.createVirtualRegister(BoolXExecRC); |
1062 | BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) |
1063 | .add(Cond[0]); |
1064 | BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) |
1065 | .addImm(0) |
1066 | .addReg(FalseReg) |
1067 | .addImm(0) |
1068 | .addReg(TrueReg) |
1069 | .addReg(SReg); |
1070 | } else if (Cond.size() == 2) { |
1071 | assert(Cond[0].isImm() && "Cond[0] is not an immediate"); |
1072 | switch (Cond[0].getImm()) { |
1073 | case SIInstrInfo::SCC_TRUE: { |
1074 | Register SReg = MRI.createVirtualRegister(BoolXExecRC); |
1075 | BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 |
1076 | : AMDGPU::S_CSELECT_B64), SReg) |
1077 | .addImm(1) |
1078 | .addImm(0); |
1079 | BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) |
1080 | .addImm(0) |
1081 | .addReg(FalseReg) |
1082 | .addImm(0) |
1083 | .addReg(TrueReg) |
1084 | .addReg(SReg); |
1085 | break; |
1086 | } |
1087 | case SIInstrInfo::SCC_FALSE: { |
1088 | Register SReg = MRI.createVirtualRegister(BoolXExecRC); |
1089 | BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 |
1090 | : AMDGPU::S_CSELECT_B64), SReg) |
1091 | .addImm(0) |
1092 | .addImm(1); |
1093 | BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) |
1094 | .addImm(0) |
1095 | .addReg(FalseReg) |
1096 | .addImm(0) |
1097 | .addReg(TrueReg) |
1098 | .addReg(SReg); |
1099 | break; |
1100 | } |
1101 | case SIInstrInfo::VCCNZ: { |
1102 | MachineOperand RegOp = Cond[1]; |
1103 | RegOp.setImplicit(false); |
1104 | Register SReg = MRI.createVirtualRegister(BoolXExecRC); |
1105 | BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) |
1106 | .add(RegOp); |
1107 | BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) |
1108 | .addImm(0) |
1109 | .addReg(FalseReg) |
1110 | .addImm(0) |
1111 | .addReg(TrueReg) |
1112 | .addReg(SReg); |
1113 | break; |
1114 | } |
1115 | case SIInstrInfo::VCCZ: { |
1116 | MachineOperand RegOp = Cond[1]; |
1117 | RegOp.setImplicit(false); |
1118 | Register SReg = MRI.createVirtualRegister(BoolXExecRC); |
1119 | BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) |
1120 | .add(RegOp); |
1121 | BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) |
1122 | .addImm(0) |
1123 | .addReg(TrueReg) |
1124 | .addImm(0) |
1125 | .addReg(FalseReg) |
1126 | .addReg(SReg); |
1127 | break; |
1128 | } |
1129 | case SIInstrInfo::EXECNZ: { |
1130 | Register SReg = MRI.createVirtualRegister(BoolXExecRC); |
1131 | Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); |
1132 | BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 |
1133 | : AMDGPU::S_OR_SAVEEXEC_B64), SReg2) |
1134 | .addImm(0); |
1135 | BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 |
1136 | : AMDGPU::S_CSELECT_B64), SReg) |
1137 | .addImm(1) |
1138 | .addImm(0); |
1139 | BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) |
1140 | .addImm(0) |
1141 | .addReg(FalseReg) |
1142 | .addImm(0) |
1143 | .addReg(TrueReg) |
1144 | .addReg(SReg); |
1145 | break; |
1146 | } |
1147 | case SIInstrInfo::EXECZ: { |
1148 | Register SReg = MRI.createVirtualRegister(BoolXExecRC); |
1149 | Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); |
1150 | BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 |
1151 | : AMDGPU::S_OR_SAVEEXEC_B64), SReg2) |
1152 | .addImm(0); |
1153 | BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 |
1154 | : AMDGPU::S_CSELECT_B64), SReg) |
1155 | .addImm(0) |
1156 | .addImm(1); |
1157 | BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) |
1158 | .addImm(0) |
1159 | .addReg(FalseReg) |
1160 | .addImm(0) |
1161 | .addReg(TrueReg) |
1162 | .addReg(SReg); |
1163 | llvm_unreachable("Unhandled branch predicate EXECZ"); |
1164 | break; |
1165 | } |
1166 | default: |
1167 | llvm_unreachable("invalid branch predicate"); |
1168 | } |
1169 | } else { |
1170 | llvm_unreachable("Can only handle Cond size 1 or 2"); |
1171 | } |
1172 | } |
1173 | |
1174 | Register SIInstrInfo::insertEQ(MachineBasicBlock *MBB, |
1175 | MachineBasicBlock::iterator I, |
1176 | const DebugLoc &DL, |
1177 | Register SrcReg, int Value) const { |
1178 | MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
1179 | Register Reg = MRI.createVirtualRegister(RI.getBoolRC()); |
1180 | BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg) |
1181 | .addImm(Value) |
1182 | .addReg(SrcReg); |
1183 | |
1184 | return Reg; |
1185 | } |
1186 | |
1187 | Register SIInstrInfo::insertNE(MachineBasicBlock *MBB, |
1188 | MachineBasicBlock::iterator I, |
1189 | const DebugLoc &DL, |
1190 | Register SrcReg, int Value) const { |
1191 | MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
1192 | Register Reg = MRI.createVirtualRegister(RI.getBoolRC()); |
1193 | BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg) |
1194 | .addImm(Value) |
1195 | .addReg(SrcReg); |
1196 | |
1197 | return Reg; |
1198 | } |
1199 | |
1200 | unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { |
1201 | |
1202 | if (RI.hasAGPRs(DstRC)) |
1203 | return AMDGPU::COPY; |
1204 | if (RI.getRegSizeInBits(*DstRC) == 32) { |
1205 | return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; |
1206 | } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) { |
1207 | return AMDGPU::S_MOV_B64; |
1208 | } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) { |
1209 | return AMDGPU::V_MOV_B64_PSEUDO; |
1210 | } |
1211 | return AMDGPU::COPY; |
1212 | } |
1213 | |
1214 | const MCInstrDesc & |
1215 | SIInstrInfo::getIndirectGPRIDXPseudo(unsigned VecSize, |
1216 | bool IsIndirectSrc) const { |
1217 | if (IsIndirectSrc) { |
1218 | if (VecSize <= 32) |
1219 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1); |
1220 | if (VecSize <= 64) |
1221 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2); |
1222 | if (VecSize <= 96) |
1223 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3); |
1224 | if (VecSize <= 128) |
1225 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4); |
1226 | if (VecSize <= 160) |
1227 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5); |
1228 | if (VecSize <= 256) |
1229 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8); |
1230 | if (VecSize <= 512) |
1231 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16); |
1232 | if (VecSize <= 1024) |
1233 | return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32); |
1234 | |
1235 | llvm_unreachable("unsupported size for IndirectRegReadGPRIDX pseudos"); |
1236 | } |
1237 | |
1238 | if (VecSize <= 32) |
1239 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1); |
1240 | if (VecSize <= 64) |
1241 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2); |
1242 | if (VecSize <= 96) |
1243 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3); |
1244 | if (VecSize <= 128) |
1245 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4); |
1246 | if (VecSize <= 160) |
1247 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5); |
1248 | if (VecSize <= 256) |
1249 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8); |
1250 | if (VecSize <= 512) |
1251 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16); |
1252 | if (VecSize <= 1024) |
1253 | return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32); |
1254 | |
1255 | llvm_unreachable("unsupported size for IndirectRegWriteGPRIDX pseudos"); |
1256 | } |
1257 | |
1258 | static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize) { |
1259 | if (VecSize <= 32) |
1260 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1; |
1261 | if (VecSize <= 64) |
1262 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2; |
1263 | if (VecSize <= 96) |
1264 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3; |
1265 | if (VecSize <= 128) |
1266 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4; |
1267 | if (VecSize <= 160) |
1268 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5; |
1269 | if (VecSize <= 256) |
1270 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8; |
1271 | if (VecSize <= 512) |
1272 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16; |
1273 | if (VecSize <= 1024) |
1274 | return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32; |
1275 | |
1276 | llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); |
1277 | } |
1278 | |
1279 | static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize) { |
1280 | if (VecSize <= 32) |
1281 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1; |
1282 | if (VecSize <= 64) |
1283 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2; |
1284 | if (VecSize <= 96) |
1285 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3; |
1286 | if (VecSize <= 128) |
1287 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4; |
1288 | if (VecSize <= 160) |
1289 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5; |
1290 | if (VecSize <= 256) |
1291 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8; |
1292 | if (VecSize <= 512) |
1293 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16; |
1294 | if (VecSize <= 1024) |
1295 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32; |
1296 | |
1297 | llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); |
1298 | } |
1299 | |
1300 | static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize) { |
1301 | if (VecSize <= 64) |
1302 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1; |
1303 | if (VecSize <= 128) |
1304 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2; |
1305 | if (VecSize <= 256) |
1306 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4; |
1307 | if (VecSize <= 512) |
1308 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8; |
1309 | if (VecSize <= 1024) |
1310 | return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16; |
1311 | |
1312 | llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); |
1313 | } |
1314 | |
1315 | const MCInstrDesc & |
1316 | SIInstrInfo::getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, |
1317 | bool IsSGPR) const { |
1318 | if (IsSGPR) { |
1319 | switch (EltSize) { |
1320 | case 32: |
1321 | return get(getIndirectSGPRWriteMovRelPseudo32(VecSize)); |
1322 | case 64: |
1323 | return get(getIndirectSGPRWriteMovRelPseudo64(VecSize)); |
1324 | default: |
1325 | llvm_unreachable("invalid reg indexing elt size"); |
1326 | } |
1327 | } |
1328 | |
1329 | assert(EltSize == 32 && "invalid reg indexing elt size"); |
1330 | return get(getIndirectVGPRWriteMovRelPseudoOpc(VecSize)); |
1331 | } |
1332 | |
1333 | static unsigned getSGPRSpillSaveOpcode(unsigned Size) { |
1334 | switch (Size) { |
1335 | case 4: |
1336 | return AMDGPU::SI_SPILL_S32_SAVE; |
1337 | case 8: |
1338 | return AMDGPU::SI_SPILL_S64_SAVE; |
1339 | case 12: |
1340 | return AMDGPU::SI_SPILL_S96_SAVE; |
1341 | case 16: |
1342 | return AMDGPU::SI_SPILL_S128_SAVE; |
1343 | case 20: |
1344 | return AMDGPU::SI_SPILL_S160_SAVE; |
1345 | case 24: |
1346 | return AMDGPU::SI_SPILL_S192_SAVE; |
1347 | case 28: |
1348 | return AMDGPU::SI_SPILL_S224_SAVE; |
1349 | case 32: |
1350 | return AMDGPU::SI_SPILL_S256_SAVE; |
1351 | case 64: |
1352 | return AMDGPU::SI_SPILL_S512_SAVE; |
1353 | case 128: |
1354 | return AMDGPU::SI_SPILL_S1024_SAVE; |
1355 | default: |
1356 | llvm_unreachable("unknown register size"); |
1357 | } |
1358 | } |
1359 | |
1360 | static unsigned getVGPRSpillSaveOpcode(unsigned Size) { |
1361 | switch (Size) { |
1362 | case 4: |
1363 | return AMDGPU::SI_SPILL_V32_SAVE; |
1364 | case 8: |
1365 | return AMDGPU::SI_SPILL_V64_SAVE; |
1366 | case 12: |
1367 | return AMDGPU::SI_SPILL_V96_SAVE; |
1368 | case 16: |
1369 | return AMDGPU::SI_SPILL_V128_SAVE; |
1370 | case 20: |
1371 | return AMDGPU::SI_SPILL_V160_SAVE; |
1372 | case 24: |
1373 | return AMDGPU::SI_SPILL_V192_SAVE; |
1374 | case 28: |
1375 | return AMDGPU::SI_SPILL_V224_SAVE; |
1376 | case 32: |
1377 | return AMDGPU::SI_SPILL_V256_SAVE; |
1378 | case 64: |
1379 | return AMDGPU::SI_SPILL_V512_SAVE; |
1380 | case 128: |
1381 | return AMDGPU::SI_SPILL_V1024_SAVE; |
1382 | default: |
1383 | llvm_unreachable("unknown register size"); |
1384 | } |
1385 | } |
1386 | |
1387 | static unsigned getAGPRSpillSaveOpcode(unsigned Size) { |
1388 | switch (Size) { |
1389 | case 4: |
1390 | return AMDGPU::SI_SPILL_A32_SAVE; |
1391 | case 8: |
1392 | return AMDGPU::SI_SPILL_A64_SAVE; |
1393 | case 12: |
1394 | return AMDGPU::SI_SPILL_A96_SAVE; |
1395 | case 16: |
1396 | return AMDGPU::SI_SPILL_A128_SAVE; |
1397 | case 20: |
1398 | return AMDGPU::SI_SPILL_A160_SAVE; |
1399 | case 24: |
1400 | return AMDGPU::SI_SPILL_A192_SAVE; |
1401 | case 28: |
1402 | return AMDGPU::SI_SPILL_A224_SAVE; |
1403 | case 32: |
1404 | return AMDGPU::SI_SPILL_A256_SAVE; |
1405 | case 64: |
1406 | return AMDGPU::SI_SPILL_A512_SAVE; |
1407 | case 128: |
1408 | return AMDGPU::SI_SPILL_A1024_SAVE; |
1409 | default: |
1410 | llvm_unreachable("unknown register size"); |
1411 | } |
1412 | } |
1413 | |
1414 | void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, |
1415 | MachineBasicBlock::iterator MI, |
1416 | Register SrcReg, bool isKill, |
1417 | int FrameIndex, |
1418 | const TargetRegisterClass *RC, |
1419 | const TargetRegisterInfo *TRI) const { |
1420 | MachineFunction *MF = MBB.getParent(); |
1421 | SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); |
1422 | MachineFrameInfo &FrameInfo = MF->getFrameInfo(); |
1423 | const DebugLoc &DL = MBB.findDebugLoc(MI); |
1424 | |
1425 | MachinePointerInfo PtrInfo |
1426 | = MachinePointerInfo::getFixedStack(*MF, FrameIndex); |
1427 | MachineMemOperand *MMO = MF->getMachineMemOperand( |
1428 | PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FrameIndex), |
1429 | FrameInfo.getObjectAlign(FrameIndex)); |
1430 | unsigned SpillSize = TRI->getSpillSize(*RC); |
1431 | |
1432 | if (RI.isSGPRClass(RC)) { |
1433 | MFI->setHasSpilledSGPRs(); |
1434 | assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled"); |
1435 | assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI && |
1436 | SrcReg != AMDGPU::EXEC && "exec should not be spilled"); |
1437 | |
1438 | |
1439 | |
1440 | const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize)); |
1441 | |
1442 | |
1443 | |
1444 | if (SrcReg.isVirtual() && SpillSize == 4) { |
1445 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
1446 | MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass); |
1447 | } |
1448 | |
1449 | BuildMI(MBB, MI, DL, OpDesc) |
1450 | .addReg(SrcReg, getKillRegState(isKill)) |
1451 | .addFrameIndex(FrameIndex) |
1452 | .addMemOperand(MMO) |
1453 | .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit); |
1454 | |
1455 | if (RI.spillSGPRToVGPR()) |
1456 | FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill); |
1457 | return; |
1458 | } |
1459 | |
1460 | unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize) |
1461 | : getVGPRSpillSaveOpcode(SpillSize); |
1462 | MFI->setHasSpilledVGPRs(); |
1463 | |
1464 | BuildMI(MBB, MI, DL, get(Opcode)) |
1465 | .addReg(SrcReg, getKillRegState(isKill)) |
1466 | .addFrameIndex(FrameIndex) |
1467 | .addReg(MFI->getStackPtrOffsetReg()) |
1468 | .addImm(0) |
1469 | .addMemOperand(MMO); |
1470 | } |
1471 | |
1472 | static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { |
1473 | switch (Size) { |
1474 | case 4: |
1475 | return AMDGPU::SI_SPILL_S32_RESTORE; |
1476 | case 8: |
1477 | return AMDGPU::SI_SPILL_S64_RESTORE; |
1478 | case 12: |
1479 | return AMDGPU::SI_SPILL_S96_RESTORE; |
1480 | case 16: |
1481 | return AMDGPU::SI_SPILL_S128_RESTORE; |
1482 | case 20: |
1483 | return AMDGPU::SI_SPILL_S160_RESTORE; |
1484 | case 24: |
1485 | return AMDGPU::SI_SPILL_S192_RESTORE; |
1486 | case 28: |
1487 | return AMDGPU::SI_SPILL_S224_RESTORE; |
1488 | case 32: |
1489 | return AMDGPU::SI_SPILL_S256_RESTORE; |
1490 | case 64: |
1491 | return AMDGPU::SI_SPILL_S512_RESTORE; |
1492 | case 128: |
1493 | return AMDGPU::SI_SPILL_S1024_RESTORE; |
1494 | default: |
1495 | llvm_unreachable("unknown register size"); |
1496 | } |
1497 | } |
1498 | |
1499 | static unsigned getVGPRSpillRestoreOpcode(unsigned Size) { |
1500 | switch (Size) { |
1501 | case 4: |
1502 | return AMDGPU::SI_SPILL_V32_RESTORE; |
1503 | case 8: |
1504 | return AMDGPU::SI_SPILL_V64_RESTORE; |
1505 | case 12: |
1506 | return AMDGPU::SI_SPILL_V96_RESTORE; |
1507 | case 16: |
1508 | return AMDGPU::SI_SPILL_V128_RESTORE; |
1509 | case 20: |
1510 | return AMDGPU::SI_SPILL_V160_RESTORE; |
1511 | case 24: |
1512 | return AMDGPU::SI_SPILL_V192_RESTORE; |
1513 | case 28: |
1514 | return AMDGPU::SI_SPILL_V224_RESTORE; |
1515 | case 32: |
1516 | return AMDGPU::SI_SPILL_V256_RESTORE; |
1517 | case 64: |
1518 | return AMDGPU::SI_SPILL_V512_RESTORE; |
1519 | case 128: |
1520 | return AMDGPU::SI_SPILL_V1024_RESTORE; |
1521 | default: |
1522 | llvm_unreachable("unknown register size"); |
1523 | } |
1524 | } |
1525 | |
1526 | static unsigned getAGPRSpillRestoreOpcode(unsigned Size) { |
1527 | switch (Size) { |
1528 | case 4: |
1529 | return AMDGPU::SI_SPILL_A32_RESTORE; |
1530 | case 8: |
1531 | return AMDGPU::SI_SPILL_A64_RESTORE; |
1532 | case 12: |
1533 | return AMDGPU::SI_SPILL_A96_RESTORE; |
1534 | case 16: |
1535 | return AMDGPU::SI_SPILL_A128_RESTORE; |
1536 | case 20: |
1537 | return AMDGPU::SI_SPILL_A160_RESTORE; |
1538 | case 24: |
1539 | return AMDGPU::SI_SPILL_A192_RESTORE; |
1540 | case 28: |
1541 | return AMDGPU::SI_SPILL_A224_RESTORE; |
1542 | case 32: |
1543 | return AMDGPU::SI_SPILL_A256_RESTORE; |
1544 | case 64: |
1545 | return AMDGPU::SI_SPILL_A512_RESTORE; |
1546 | case 128: |
1547 | return AMDGPU::SI_SPILL_A1024_RESTORE; |
1548 | default: |
1549 | llvm_unreachable("unknown register size"); |
1550 | } |
1551 | } |
1552 | |
1553 | void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, |
1554 | MachineBasicBlock::iterator MI, |
1555 | Register DestReg, int FrameIndex, |
1556 | const TargetRegisterClass *RC, |
1557 | const TargetRegisterInfo *TRI) const { |
1558 | MachineFunction *MF = MBB.getParent(); |
1559 | SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); |
1560 | MachineFrameInfo &FrameInfo = MF->getFrameInfo(); |
1561 | const DebugLoc &DL = MBB.findDebugLoc(MI); |
1562 | unsigned SpillSize = TRI->getSpillSize(*RC); |
1563 | |
1564 | MachinePointerInfo PtrInfo |
1565 | = MachinePointerInfo::getFixedStack(*MF, FrameIndex); |
1566 | |
1567 | MachineMemOperand *MMO = MF->getMachineMemOperand( |
1568 | PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FrameIndex), |
1569 | FrameInfo.getObjectAlign(FrameIndex)); |
1570 | |
1571 | if (RI.isSGPRClass(RC)) { |
1572 | MFI->setHasSpilledSGPRs(); |
1573 | assert(DestReg != AMDGPU::M0 && "m0 should not be reloaded into"); |
1574 | assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI && |
1575 | DestReg != AMDGPU::EXEC && "exec should not be spilled"); |
1576 | |
1577 | |
1578 | |
1579 | const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize)); |
1580 | if (DestReg.isVirtual() && SpillSize == 4) { |
1581 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
1582 | MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass); |
1583 | } |
1584 | |
1585 | if (RI.spillSGPRToVGPR()) |
1586 | FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill); |
1587 | BuildMI(MBB, MI, DL, OpDesc, DestReg) |
1588 | .addFrameIndex(FrameIndex) |
1589 | .addMemOperand(MMO) |
1590 | .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit); |
1591 | |
1592 | return; |
1593 | } |
1594 | |
1595 | unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize) |
1596 | : getVGPRSpillRestoreOpcode(SpillSize); |
1597 | BuildMI(MBB, MI, DL, get(Opcode), DestReg) |
1598 | .addFrameIndex(FrameIndex) |
1599 | .addReg(MFI->getStackPtrOffsetReg()) |
1600 | .addImm(0) |
1601 | .addMemOperand(MMO); |
1602 | } |
1603 | |
1604 | void SIInstrInfo::insertNoop(MachineBasicBlock &MBB, |
1605 | MachineBasicBlock::iterator MI) const { |
1606 | insertNoops(MBB, MI, 1); |
1607 | } |
1608 | |
1609 | void SIInstrInfo::insertNoops(MachineBasicBlock &MBB, |
1610 | MachineBasicBlock::iterator MI, |
1611 | unsigned Quantity) const { |
1612 | DebugLoc DL = MBB.findDebugLoc(MI); |
1613 | while (Quantity > 0) { |
1614 | unsigned Arg = std::min(Quantity, 8u); |
1615 | Quantity -= Arg; |
1616 | BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)).addImm(Arg - 1); |
1617 | } |
1618 | } |
1619 | |
1620 | void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const { |
1621 | auto MF = MBB.getParent(); |
1622 | SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); |
1623 | |
1624 | assert(Info->isEntryFunction()); |
1625 | |
1626 | if (MBB.succ_empty()) { |
1627 | bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end(); |
1628 | if (HasNoTerminator) { |
1629 | if (Info->returnsVoid()) { |
1630 | BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::S_ENDPGM)).addImm(0); |
1631 | } else { |
1632 | BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::SI_RETURN_TO_EPILOG)); |
1633 | } |
1634 | } |
1635 | } |
1636 | } |
1637 | |
1638 | unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) { |
1639 | switch (MI.getOpcode()) { |
1640 | default: return 1; |
1641 | |
1642 | case AMDGPU::S_NOP: |
1643 | return MI.getOperand(0).getImm() + 1; |
1644 | } |
1645 | } |
1646 | |
1647 | bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { |
1648 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
1649 | MachineBasicBlock &MBB = *MI.getParent(); |
1650 | DebugLoc DL = MBB.findDebugLoc(MI); |
1651 | switch (MI.getOpcode()) { |
1652 | default: return TargetInstrInfo::expandPostRAPseudo(MI); |
1653 | case AMDGPU::S_MOV_B64_term: |
1654 | |
1655 | |
1656 | MI.setDesc(get(AMDGPU::S_MOV_B64)); |
1657 | break; |
1658 | |
1659 | case AMDGPU::S_MOV_B32_term: |
1660 | |
1661 | |
1662 | MI.setDesc(get(AMDGPU::S_MOV_B32)); |
1663 | break; |
1664 | |
1665 | case AMDGPU::S_XOR_B64_term: |
1666 | |
1667 | |
1668 | MI.setDesc(get(AMDGPU::S_XOR_B64)); |
1669 | break; |
1670 | |
1671 | case AMDGPU::S_XOR_B32_term: |
1672 | |
1673 | |
1674 | MI.setDesc(get(AMDGPU::S_XOR_B32)); |
1675 | break; |
1676 | case AMDGPU::S_OR_B64_term: |
1677 | |
1678 | |
1679 | MI.setDesc(get(AMDGPU::S_OR_B64)); |
1680 | break; |
1681 | case AMDGPU::S_OR_B32_term: |
1682 | |
1683 | |
1684 | MI.setDesc(get(AMDGPU::S_OR_B32)); |
1685 | break; |
1686 | |
1687 | case AMDGPU::S_ANDN2_B64_term: |
1688 | |
1689 | |
1690 | MI.setDesc(get(AMDGPU::S_ANDN2_B64)); |
1691 | break; |
1692 | |
1693 | case AMDGPU::S_ANDN2_B32_term: |
1694 | |
1695 | |
1696 | MI.setDesc(get(AMDGPU::S_ANDN2_B32)); |
1697 | break; |
1698 | |
1699 | case AMDGPU::S_AND_B64_term: |
1700 | |
1701 | |
1702 | MI.setDesc(get(AMDGPU::S_AND_B64)); |
1703 | break; |
1704 | |
1705 | case AMDGPU::S_AND_B32_term: |
1706 | |
1707 | |
1708 | MI.setDesc(get(AMDGPU::S_AND_B32)); |
1709 | break; |
1710 | |
1711 | case AMDGPU::V_MOV_B64_PSEUDO: { |
1712 | Register Dst = MI.getOperand(0).getReg(); |
1713 | Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0); |
1714 | Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1); |
1715 | |
1716 | const MachineOperand &SrcOp = MI.getOperand(1); |
1717 | |
1718 | assert(!SrcOp.isFPImm()); |
1719 | if (SrcOp.isImm()) { |
1720 | APInt Imm(64, SrcOp.getImm()); |
1721 | APInt Lo(32, Imm.getLoBits(32).getZExtValue()); |
1722 | APInt Hi(32, Imm.getHiBits(32).getZExtValue()); |
1723 | if (ST.hasPackedFP32Ops() && Lo == Hi && isInlineConstant(Lo)) { |
1724 | BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst) |
1725 | .addImm(SISrcMods::OP_SEL_1) |
1726 | .addImm(Lo.getSExtValue()) |
1727 | .addImm(SISrcMods::OP_SEL_1) |
1728 | .addImm(Lo.getSExtValue()) |
1729 | .addImm(0) |
1730 | .addImm(0) |
1731 | .addImm(0) |
1732 | .addImm(0) |
1733 | .addImm(0); |
1734 | } else { |
1735 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) |
1736 | .addImm(Lo.getSExtValue()) |
1737 | .addReg(Dst, RegState::Implicit | RegState::Define); |
1738 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) |
1739 | .addImm(Hi.getSExtValue()) |
1740 | .addReg(Dst, RegState::Implicit | RegState::Define); |
1741 | } |
1742 | } else { |
1743 | assert(SrcOp.isReg()); |
1744 | if (ST.hasPackedFP32Ops() && |
1745 | !RI.isAGPR(MBB.getParent()->getRegInfo(), SrcOp.getReg())) { |
1746 | BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst) |
1747 | .addImm(SISrcMods::OP_SEL_1) |
1748 | .addReg(SrcOp.getReg()) |
1749 | .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) |
1750 | .addReg(SrcOp.getReg()) |
1751 | .addImm(0) |
1752 | .addImm(0) |
1753 | .addImm(0) |
1754 | .addImm(0) |
1755 | .addImm(0); |
1756 | } else { |
1757 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) |
1758 | .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) |
1759 | .addReg(Dst, RegState::Implicit | RegState::Define); |
1760 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) |
1761 | .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) |
1762 | .addReg(Dst, RegState::Implicit | RegState::Define); |
1763 | } |
1764 | } |
1765 | MI.eraseFromParent(); |
1766 | break; |
1767 | } |
1768 | case AMDGPU::V_MOV_B64_DPP_PSEUDO: { |
1769 | expandMovDPP64(MI); |
1770 | break; |
1771 | } |
1772 | case AMDGPU::S_MOV_B64_IMM_PSEUDO: { |
1773 | const MachineOperand &SrcOp = MI.getOperand(1); |
1774 | assert(!SrcOp.isFPImm()); |
1775 | APInt Imm(64, SrcOp.getImm()); |
1776 | if (Imm.isIntN(32) || isInlineConstant(Imm)) { |
1777 | MI.setDesc(get(AMDGPU::S_MOV_B64)); |
1778 | break; |
1779 | } |
1780 | |
1781 | Register Dst = MI.getOperand(0).getReg(); |
1782 | Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0); |
1783 | Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1); |
1784 | |
1785 | APInt Lo(32, Imm.getLoBits(32).getZExtValue()); |
1786 | APInt Hi(32, Imm.getHiBits(32).getZExtValue()); |
1787 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DstLo) |
1788 | .addImm(Lo.getSExtValue()) |
1789 | .addReg(Dst, RegState::Implicit | RegState::Define); |
1790 | BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DstHi) |
1791 | .addImm(Hi.getSExtValue()) |
1792 | .addReg(Dst, RegState::Implicit | RegState::Define); |
1793 | MI.eraseFromParent(); |
1794 | break; |
1795 | } |
1796 | case AMDGPU::V_SET_INACTIVE_B32: { |
1797 | unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; |
1798 | unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
1799 | auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec); |
1800 | FirstNot->addRegisterDead(AMDGPU::SCC, TRI); |
1801 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg()) |
1802 | .add(MI.getOperand(2)); |
1803 | BuildMI(MBB, MI, DL, get(NotOpc), Exec) |
1804 | .addReg(Exec); |
1805 | MI.eraseFromParent(); |
1806 | break; |
1807 | } |
1808 | case AMDGPU::V_SET_INACTIVE_B64: { |
1809 | unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; |
1810 | unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
1811 | auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec); |
1812 | FirstNot->addRegisterDead(AMDGPU::SCC, TRI); |
1813 | MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), |
1814 | MI.getOperand(0).getReg()) |
1815 | .add(MI.getOperand(2)); |
1816 | expandPostRAPseudo(*Copy); |
1817 | BuildMI(MBB, MI, DL, get(NotOpc), Exec) |
1818 | .addReg(Exec); |
1819 | MI.eraseFromParent(); |
1820 | break; |
1821 | } |
1822 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1: |
1823 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2: |
1824 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3: |
1825 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4: |
1826 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5: |
1827 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8: |
1828 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16: |
1829 | case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32: |
1830 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1: |
1831 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2: |
1832 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3: |
1833 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4: |
1834 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5: |
1835 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8: |
1836 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16: |
1837 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32: |
1838 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1: |
1839 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2: |
1840 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4: |
1841 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8: |
1842 | case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: { |
1843 | const TargetRegisterClass *EltRC = getOpRegClass(MI, 2); |
1844 | |
1845 | unsigned Opc; |
1846 | if (RI.hasVGPRs(EltRC)) { |
1847 | Opc = AMDGPU::V_MOVRELD_B32_e32; |
1848 | } else { |
1849 | Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64 |
1850 | : AMDGPU::S_MOVRELD_B32; |
1851 | } |
1852 | |
1853 | const MCInstrDesc &OpDesc = get(Opc); |
1854 | Register VecReg = MI.getOperand(0).getReg(); |
1855 | bool IsUndef = MI.getOperand(1).isUndef(); |
1856 | unsigned SubReg = MI.getOperand(3).getImm(); |
1857 | assert(VecReg == MI.getOperand(1).getReg()); |
1858 | |
1859 | MachineInstrBuilder MIB = |
1860 | BuildMI(MBB, MI, DL, OpDesc) |
1861 | .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) |
1862 | .add(MI.getOperand(2)) |
1863 | .addReg(VecReg, RegState::ImplicitDefine) |
1864 | .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0)); |
1865 | |
1866 | const int ImpDefIdx = |
1867 | OpDesc.getNumOperands() + OpDesc.getNumImplicitUses(); |
1868 | const int ImpUseIdx = ImpDefIdx + 1; |
1869 | MIB->tieOperands(ImpDefIdx, ImpUseIdx); |
1870 | MI.eraseFromParent(); |
1871 | break; |
1872 | } |
1873 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1: |
1874 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2: |
1875 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3: |
1876 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4: |
1877 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5: |
1878 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8: |
1879 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16: |
1880 | case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: { |
1881 | assert(ST.useVGPRIndexMode()); |
1882 | Register VecReg = MI.getOperand(0).getReg(); |
1883 | bool IsUndef = MI.getOperand(1).isUndef(); |
1884 | Register Idx = MI.getOperand(3).getReg(); |
1885 | Register SubReg = MI.getOperand(4).getImm(); |
1886 | |
1887 | MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON)) |
1888 | .addReg(Idx) |
1889 | .addImm(AMDGPU::VGPRIndexMode::DST_ENABLE); |
1890 | SetOn->getOperand(3).setIsUndef(); |
1891 | |
1892 | const MCInstrDesc &OpDesc = get(AMDGPU::V_MOV_B32_indirect); |
1893 | MachineInstrBuilder MIB = |
1894 | BuildMI(MBB, MI, DL, OpDesc) |
1895 | .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) |
1896 | .add(MI.getOperand(2)) |
1897 | .addReg(VecReg, RegState::ImplicitDefine) |
1898 | .addReg(VecReg, |
1899 | RegState::Implicit | (IsUndef ? RegState::Undef : 0)); |
1900 | |
1901 | const int ImpDefIdx = OpDesc.getNumOperands() + OpDesc.getNumImplicitUses(); |
1902 | const int ImpUseIdx = ImpDefIdx + 1; |
1903 | MIB->tieOperands(ImpDefIdx, ImpUseIdx); |
1904 | |
1905 | MachineInstr *SetOff = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_OFF)); |
1906 | |
1907 | finalizeBundle(MBB, SetOn->getIterator(), std::next(SetOff->getIterator())); |
1908 | |
1909 | MI.eraseFromParent(); |
1910 | break; |
1911 | } |
1912 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1: |
1913 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2: |
1914 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3: |
1915 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4: |
1916 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5: |
1917 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8: |
1918 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16: |
1919 | case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: { |
1920 | assert(ST.useVGPRIndexMode()); |
1921 | Register Dst = MI.getOperand(0).getReg(); |
1922 | Register VecReg = MI.getOperand(1).getReg(); |
1923 | bool IsUndef = MI.getOperand(1).isUndef(); |
1924 | Register Idx = MI.getOperand(2).getReg(); |
1925 | Register SubReg = MI.getOperand(3).getImm(); |
1926 | |
1927 | MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON)) |
1928 | .addReg(Idx) |
1929 | .addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE); |
1930 | SetOn->getOperand(3).setIsUndef(); |
1931 | |
1932 | BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32)) |
1933 | .addDef(Dst) |
1934 | .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) |
1935 | .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0)) |
1936 | .addReg(AMDGPU::M0, RegState::Implicit); |
1937 | |
1938 | MachineInstr *SetOff = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_OFF)); |
1939 | |
1940 | finalizeBundle(MBB, SetOn->getIterator(), std::next(SetOff->getIterator())); |
1941 | |
1942 | MI.eraseFromParent(); |
1943 | break; |
1944 | } |
1945 | case AMDGPU::SI_PC_ADD_REL_OFFSET: { |
1946 | MachineFunction &MF = *MBB.getParent(); |
1947 | Register Reg = MI.getOperand(0).getReg(); |
1948 | Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0); |
1949 | Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1); |
1950 | |
1951 | |
1952 | |
1953 | MIBundleBuilder Bundler(MBB, MI); |
1954 | Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); |
1955 | |
1956 | |
1957 | |
1958 | Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo) |
1959 | .addReg(RegLo) |
1960 | .add(MI.getOperand(1))); |
1961 | |
1962 | MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) |
1963 | .addReg(RegHi); |
1964 | MIB.add(MI.getOperand(2)); |
1965 | |
1966 | Bundler.append(MIB); |
1967 | finalizeBundle(MBB, Bundler.begin()); |
1968 | |
1969 | MI.eraseFromParent(); |
1970 | break; |
1971 | } |
1972 | case AMDGPU::ENTER_STRICT_WWM: { |
1973 | |
1974 | |
1975 | MI.setDesc(get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 |
1976 | : AMDGPU::S_OR_SAVEEXEC_B64)); |
1977 | break; |
1978 | } |
1979 | case AMDGPU::ENTER_STRICT_WQM: { |
1980 | |
1981 | |
1982 | const unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
1983 | const unsigned WQMOp = ST.isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64; |
1984 | const unsigned MovOp = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; |
1985 | BuildMI(MBB, MI, DL, get(MovOp), MI.getOperand(0).getReg()).addReg(Exec); |
1986 | BuildMI(MBB, MI, DL, get(WQMOp), Exec).addReg(Exec); |
1987 | |
1988 | MI.eraseFromParent(); |
1989 | break; |
1990 | } |
1991 | case AMDGPU::EXIT_STRICT_WWM: |
1992 | case AMDGPU::EXIT_STRICT_WQM: { |
1993 | |
1994 | |
1995 | MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64)); |
1996 | break; |
1997 | } |
1998 | } |
1999 | return true; |
2000 | } |
2001 | |
2002 | std::pair<MachineInstr*, MachineInstr*> |
2003 | SIInstrInfo::expandMovDPP64(MachineInstr &MI) const { |
2004 | assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); |
2005 | |
2006 | MachineBasicBlock &MBB = *MI.getParent(); |
2007 | DebugLoc DL = MBB.findDebugLoc(MI); |
2008 | MachineFunction *MF = MBB.getParent(); |
2009 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
2010 | Register Dst = MI.getOperand(0).getReg(); |
2011 | unsigned Part = 0; |
2012 | MachineInstr *Split[2]; |
2013 | |
2014 | for (auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) { |
2015 | auto MovDPP = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_dpp)); |
2016 | if (Dst.isPhysical()) { |
2017 | MovDPP.addDef(RI.getSubReg(Dst, Sub)); |
2018 | } else { |
2019 | assert(MRI.isSSA()); |
2020 | auto Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
2021 | MovDPP.addDef(Tmp); |
2022 | } |
2023 | |
2024 | for (unsigned I = 1; I <= 2; ++I) { |
2025 | const MachineOperand &SrcOp = MI.getOperand(I); |
2026 | assert(!SrcOp.isFPImm()); |
2027 | if (SrcOp.isImm()) { |
2028 | APInt Imm(64, SrcOp.getImm()); |
2029 | Imm.ashrInPlace(Part * 32); |
2030 | MovDPP.addImm(Imm.getLoBits(32).getZExtValue()); |
2031 | } else { |
2032 | assert(SrcOp.isReg()); |
2033 | Register Src = SrcOp.getReg(); |
2034 | if (Src.isPhysical()) |
2035 | MovDPP.addReg(RI.getSubReg(Src, Sub)); |
2036 | else |
2037 | MovDPP.addReg(Src, SrcOp.isUndef() ? RegState::Undef : 0, Sub); |
2038 | } |
2039 | } |
2040 | |
2041 | for (unsigned I = 3; I < MI.getNumExplicitOperands(); ++I) |
2042 | MovDPP.addImm(MI.getOperand(I).getImm()); |
2043 | |
2044 | Split[Part] = MovDPP; |
2045 | ++Part; |
2046 | } |
2047 | |
2048 | if (Dst.isVirtual()) |
2049 | BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), Dst) |
2050 | .addReg(Split[0]->getOperand(0).getReg()) |
2051 | .addImm(AMDGPU::sub0) |
2052 | .addReg(Split[1]->getOperand(0).getReg()) |
2053 | .addImm(AMDGPU::sub1); |
2054 | |
2055 | MI.eraseFromParent(); |
2056 | return std::make_pair(Split[0], Split[1]); |
2057 | } |
2058 | |
2059 | bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI, |
2060 | MachineOperand &Src0, |
2061 | unsigned Src0OpName, |
2062 | MachineOperand &Src1, |
2063 | unsigned Src1OpName) const { |
2064 | MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName); |
2065 | if (!Src0Mods) |
2066 | return false; |
2067 | |
2068 | MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName); |
2069 | assert(Src1Mods && |
2070 | "All commutable instructions have both src0 and src1 modifiers"); |
2071 | |
2072 | int Src0ModsVal = Src0Mods->getImm(); |
2073 | int Src1ModsVal = Src1Mods->getImm(); |
2074 | |
2075 | Src1Mods->setImm(Src0ModsVal); |
2076 | Src0Mods->setImm(Src1ModsVal); |
2077 | return true; |
2078 | } |
2079 | |
2080 | static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI, |
2081 | MachineOperand &RegOp, |
2082 | MachineOperand &NonRegOp) { |
2083 | Register Reg = RegOp.getReg(); |
2084 | unsigned SubReg = RegOp.getSubReg(); |
2085 | bool IsKill = RegOp.isKill(); |
2086 | bool IsDead = RegOp.isDead(); |
2087 | bool IsUndef = RegOp.isUndef(); |
2088 | bool IsDebug = RegOp.isDebug(); |
2089 | |
2090 | if (NonRegOp.isImm()) |
2091 | RegOp.ChangeToImmediate(NonRegOp.getImm()); |
2092 | else if (NonRegOp.isFI()) |
2093 | RegOp.ChangeToFrameIndex(NonRegOp.getIndex()); |
2094 | else if (NonRegOp.isGlobal()) { |
2095 | RegOp.ChangeToGA(NonRegOp.getGlobal(), NonRegOp.getOffset(), |
2096 | NonRegOp.getTargetFlags()); |
2097 | } else |
2098 | return nullptr; |
2099 | |
2100 | |
2101 | RegOp.setTargetFlags(NonRegOp.getTargetFlags()); |
2102 | |
2103 | NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug); |
2104 | NonRegOp.setSubReg(SubReg); |
2105 | |
2106 | return &MI; |
2107 | } |
2108 | |
2109 | MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, |
2110 | unsigned Src0Idx, |
2111 | unsigned Src1Idx) const { |
2112 | assert(!NewMI && "this should never be used"); |
2113 | |
2114 | unsigned Opc = MI.getOpcode(); |
2115 | int CommutedOpcode = commuteOpcode(Opc); |
2116 | if (CommutedOpcode == -1) |
2117 | return nullptr; |
2118 | |
2119 | assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) == |
2120 | static_cast<int>(Src0Idx) && |
2121 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) == |
2122 | static_cast<int>(Src1Idx) && |
2123 | "inconsistency with findCommutedOpIndices"); |
2124 | |
2125 | MachineOperand &Src0 = MI.getOperand(Src0Idx); |
2126 | MachineOperand &Src1 = MI.getOperand(Src1Idx); |
2127 | |
2128 | MachineInstr *CommutedMI = nullptr; |
2129 | if (Src0.isReg() && Src1.isReg()) { |
2130 | if (isOperandLegal(MI, Src1Idx, &Src0)) { |
2131 | |
2132 | CommutedMI |
2133 | = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx); |
2134 | } |
2135 | |
2136 | } else if (Src0.isReg() && !Src1.isReg()) { |
2137 | |
2138 | |
2139 | CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1); |
2140 | } else if (!Src0.isReg() && Src1.isReg()) { |
2141 | if (isOperandLegal(MI, Src1Idx, &Src0)) |
2142 | CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0); |
2143 | } else { |
2144 | |
2145 | return nullptr; |
2146 | } |
2147 | |
2148 | if (CommutedMI) { |
2149 | swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers, |
2150 | Src1, AMDGPU::OpName::src1_modifiers); |
2151 | |
2152 | CommutedMI->setDesc(get(CommutedOpcode)); |
2153 | } |
2154 | |
2155 | return CommutedMI; |
2156 | } |
2157 | |
2158 | |
2159 | |
2160 | |
2161 | bool SIInstrInfo::findCommutedOpIndices(const MachineInstr &MI, |
2162 | unsigned &SrcOpIdx0, |
2163 | unsigned &SrcOpIdx1) const { |
2164 | return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1); |
2165 | } |
2166 | |
2167 | bool SIInstrInfo::findCommutedOpIndices(MCInstrDesc Desc, unsigned &SrcOpIdx0, |
2168 | unsigned &SrcOpIdx1) const { |
2169 | if (!Desc.isCommutable()) |
2170 | return false; |
2171 | |
2172 | unsigned Opc = Desc.getOpcode(); |
2173 | int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); |
2174 | if (Src0Idx == -1) |
2175 | return false; |
2176 | |
2177 | int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); |
2178 | if (Src1Idx == -1) |
2179 | return false; |
2180 | |
2181 | return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx); |
2182 | } |
2183 | |
2184 | bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp, |
2185 | int64_t BrOffset) const { |
2186 | |
2187 | |
2188 | assert(BranchOp != AMDGPU::S_SETPC_B64); |
2189 | |
2190 | |
2191 | BrOffset /= 4; |
2192 | |
2193 | |
2194 | |
2195 | BrOffset -= 1; |
2196 | |
2197 | return isIntN(BranchOffsetBits, BrOffset); |
2198 | } |
2199 | |
2200 | MachineBasicBlock *SIInstrInfo::getBranchDestBlock( |
2201 | const MachineInstr &MI) const { |
2202 | if (MI.getOpcode() == AMDGPU::S_SETPC_B64) { |
2203 | |
2204 | |
2205 | return nullptr; |
2206 | } |
2207 | |
2208 | return MI.getOperand(0).getMBB(); |
2209 | } |
2210 | |
2211 | unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, |
2212 | MachineBasicBlock &DestBB, |
2213 | const DebugLoc &DL, |
2214 | int64_t BrOffset, |
2215 | RegScavenger *RS) const { |
2216 | assert(RS && "RegScavenger required for long branching"); |
2217 | assert(MBB.empty() && |
2218 | "new block should be inserted for expanding unconditional branch"); |
2219 | assert(MBB.pred_size() == 1); |
2220 | |
2221 | MachineFunction *MF = MBB.getParent(); |
2222 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
2223 | |
2224 | |
2225 | |
2226 | Register PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); |
2227 | |
2228 | auto I = MBB.end(); |
2229 | |
2230 | |
2231 | |
2232 | MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg); |
2233 | |
2234 | auto &MCCtx = MF->getContext(); |
2235 | MCSymbol *PostGetPCLabel = |
2236 | MCCtx.createTempSymbol("post_getpc", true); |
2237 | GetPC->setPostInstrSymbol(*MF, PostGetPCLabel); |
2238 | |
2239 | MCSymbol *OffsetLo = |
2240 | MCCtx.createTempSymbol("offset_lo", true); |
2241 | MCSymbol *OffsetHi = |
2242 | MCCtx.createTempSymbol("offset_hi", true); |
2243 | BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32)) |
2244 | .addReg(PCReg, RegState::Define, AMDGPU::sub0) |
2245 | .addReg(PCReg, 0, AMDGPU::sub0) |
2246 | .addSym(OffsetLo, MO_FAR_BRANCH_OFFSET); |
2247 | BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32)) |
2248 | .addReg(PCReg, RegState::Define, AMDGPU::sub1) |
2249 | .addReg(PCReg, 0, AMDGPU::sub1) |
2250 | .addSym(OffsetHi, MO_FAR_BRANCH_OFFSET); |
2251 | |
2252 | |
2253 | BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64)) |
2254 | .addReg(PCReg); |
2255 | |
2256 | auto ComputeBlockSize = [](const TargetInstrInfo *TII, |
2257 | const MachineBasicBlock &MBB) { |
2258 | unsigned Size = 0; |
2259 | for (const MachineInstr &MI : MBB) |
2260 | Size += TII->getInstSizeInBytes(MI); |
2261 | return Size; |
2262 | }; |
2263 | |
2264 | |
2265 | |
2266 | |
2267 | |
2268 | |
2269 | |
2270 | |
2271 | |
2272 | |
2273 | |
2274 | |
2275 | |
2276 | |
2277 | |
2278 | |
2279 | |
2280 | |
2281 | |
2282 | |
2283 | |
2284 | |
2285 | |
2286 | |
2287 | |
2288 | |
2289 | |
2290 | |
2291 | |
2292 | |
2293 | |
2294 | |
2295 | |
2296 | |
2297 | |
2298 | |
2299 | |
2300 | RS->enterBasicBlockEnd(MBB); |
2301 | Register Scav = RS->scavengeRegisterBackwards( |
2302 | AMDGPU::SReg_64RegClass, |
2303 | MachineBasicBlock::iterator(GetPC), false, 0); |
2304 | MRI.replaceRegWith(PCReg, Scav); |
2305 | MRI.clearVirtRegs(); |
2306 | RS->setRegUsed(Scav); |
2307 | |
2308 | |
2309 | auto *Offset = MCBinaryExpr::createSub( |
2310 | MCSymbolRefExpr::create(DestBB.getSymbol(), MCCtx), |
2311 | MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx); |
2312 | |
2313 | auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx); |
2314 | OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx)); |
2315 | auto *ShAmt = MCConstantExpr::create(32, MCCtx); |
2316 | OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx)); |
2317 | return ComputeBlockSize(this, MBB); |
2318 | } |
2319 | |
2320 | unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { |
2321 | switch (Cond) { |
2322 | case SIInstrInfo::SCC_TRUE: |
2323 | return AMDGPU::S_CBRANCH_SCC1; |
2324 | case SIInstrInfo::SCC_FALSE: |
2325 | return AMDGPU::S_CBRANCH_SCC0; |
2326 | case SIInstrInfo::VCCNZ: |
2327 | return AMDGPU::S_CBRANCH_VCCNZ; |
2328 | case SIInstrInfo::VCCZ: |
2329 | return AMDGPU::S_CBRANCH_VCCZ; |
2330 | case SIInstrInfo::EXECNZ: |
2331 | return AMDGPU::S_CBRANCH_EXECNZ; |
2332 | case SIInstrInfo::EXECZ: |
2333 | return AMDGPU::S_CBRANCH_EXECZ; |
2334 | default: |
2335 | llvm_unreachable("invalid branch predicate"); |
2336 | } |
2337 | } |
2338 | |
2339 | SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) { |
2340 | switch (Opcode) { |
2341 | case AMDGPU::S_CBRANCH_SCC0: |
2342 | return SCC_FALSE; |
2343 | case AMDGPU::S_CBRANCH_SCC1: |
2344 | return SCC_TRUE; |
2345 | case AMDGPU::S_CBRANCH_VCCNZ: |
2346 | return VCCNZ; |
2347 | case AMDGPU::S_CBRANCH_VCCZ: |
2348 | return VCCZ; |
2349 | case AMDGPU::S_CBRANCH_EXECNZ: |
2350 | return EXECNZ; |
2351 | case AMDGPU::S_CBRANCH_EXECZ: |
2352 | return EXECZ; |
2353 | default: |
2354 | return INVALID_BR; |
2355 | } |
2356 | } |
2357 | |
2358 | bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB, |
2359 | MachineBasicBlock::iterator I, |
2360 | MachineBasicBlock *&TBB, |
2361 | MachineBasicBlock *&FBB, |
2362 | SmallVectorImpl<MachineOperand> &Cond, |
2363 | bool AllowModify) const { |
2364 | if (I->getOpcode() == AMDGPU::S_BRANCH) { |
2365 | |
2366 | TBB = I->getOperand(0).getMBB(); |
2367 | return false; |
2368 | } |
2369 | |
2370 | MachineBasicBlock *CondBB = nullptr; |
2371 | |
2372 | if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { |
2373 | CondBB = I->getOperand(1).getMBB(); |
2374 | Cond.push_back(I->getOperand(0)); |
2375 | } else { |
2376 | BranchPredicate Pred = getBranchPredicate(I->getOpcode()); |
2377 | if (Pred == INVALID_BR) |
2378 | return true; |
2379 | |
2380 | CondBB = I->getOperand(0).getMBB(); |
2381 | Cond.push_back(MachineOperand::CreateImm(Pred)); |
2382 | Cond.push_back(I->getOperand(1)); |
2383 | } |
2384 | ++I; |
2385 | |
2386 | if (I == MBB.end()) { |
2387 | |
2388 | TBB = CondBB; |
2389 | return false; |
2390 | } |
2391 | |
2392 | if (I->getOpcode() == AMDGPU::S_BRANCH) { |
2393 | TBB = CondBB; |
2394 | FBB = I->getOperand(0).getMBB(); |
2395 | return false; |
2396 | } |
2397 | |
2398 | return true; |
2399 | } |
2400 | |
2401 | bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, |
2402 | MachineBasicBlock *&FBB, |
2403 | SmallVectorImpl<MachineOperand> &Cond, |
2404 | bool AllowModify) const { |
2405 | MachineBasicBlock::iterator I = MBB.getFirstTerminator(); |
2406 | auto E = MBB.end(); |
2407 | if (I == E) |
2408 | return false; |
2409 | |
2410 | |
2411 | |
2412 | while (I != E && !I->isBranch() && !I->isReturn()) { |
2413 | switch (I->getOpcode()) { |
2414 | case AMDGPU::S_MOV_B64_term: |
2415 | case AMDGPU::S_XOR_B64_term: |
2416 | case AMDGPU::S_OR_B64_term: |
2417 | case AMDGPU::S_ANDN2_B64_term: |
2418 | case AMDGPU::S_AND_B64_term: |
2419 | case AMDGPU::S_MOV_B32_term: |
2420 | case AMDGPU::S_XOR_B32_term: |
2421 | case AMDGPU::S_OR_B32_term: |
2422 | case AMDGPU::S_ANDN2_B32_term: |
2423 | case AMDGPU::S_AND_B32_term: |
2424 | break; |
2425 | case AMDGPU::SI_IF: |
2426 | case AMDGPU::SI_ELSE: |
2427 | case AMDGPU::SI_KILL_I1_TERMINATOR: |
2428 | case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: |
2429 | |
2430 | return true; |
2431 | default: |
2432 | llvm_unreachable("unexpected non-branch terminator inst"); |
2433 | } |
2434 | |
2435 | ++I; |
2436 | } |
2437 | |
2438 | if (I == E) |
2439 | return false; |
2440 | |
2441 | return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify); |
2442 | } |
2443 | |
2444 | unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB, |
2445 | int *BytesRemoved) const { |
2446 | MachineBasicBlock::iterator I = MBB.getFirstTerminator(); |
2447 | |
2448 | unsigned Count = 0; |
2449 | unsigned RemovedSize = 0; |
2450 | while (I != MBB.end()) { |
2451 | MachineBasicBlock::iterator Next = std::next(I); |
2452 | RemovedSize += getInstSizeInBytes(*I); |
2453 | I->eraseFromParent(); |
2454 | ++Count; |
2455 | I = Next; |
2456 | } |
2457 | |
2458 | if (BytesRemoved) |
2459 | *BytesRemoved = RemovedSize; |
2460 | |
2461 | return Count; |
2462 | } |
2463 | |
2464 | |
2465 | static void preserveCondRegFlags(MachineOperand &CondReg, |
2466 | const MachineOperand &OrigCond) { |
2467 | CondReg.setIsUndef(OrigCond.isUndef()); |
2468 | CondReg.setIsKill(OrigCond.isKill()); |
2469 | } |
2470 | |
2471 | unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, |
2472 | MachineBasicBlock *TBB, |
2473 | MachineBasicBlock *FBB, |
2474 | ArrayRef<MachineOperand> Cond, |
2475 | const DebugLoc &DL, |
2476 | int *BytesAdded) const { |
2477 | if (!FBB && Cond.empty()) { |
2478 | BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) |
2479 | .addMBB(TBB); |
2480 | if (BytesAdded) |
2481 | *BytesAdded = ST.hasOffset3fBug() ? 8 : 4; |
2482 | return 1; |
2483 | } |
2484 | |
2485 | if(Cond.size() == 1 && Cond[0].isReg()) { |
2486 | BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO)) |
2487 | .add(Cond[0]) |
2488 | .addMBB(TBB); |
2489 | return 1; |
2490 | } |
2491 | |
2492 | assert(TBB && Cond[0].isImm()); |
2493 | |
2494 | unsigned Opcode |
2495 | = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm())); |
2496 | |
2497 | if (!FBB) { |
2498 | Cond[1].isUndef(); |
2499 | MachineInstr *CondBr = |
2500 | BuildMI(&MBB, DL, get(Opcode)) |
2501 | .addMBB(TBB); |
2502 | |
2503 | |
2504 | preserveCondRegFlags(CondBr->getOperand(1), Cond[1]); |
2505 | fixImplicitOperands(*CondBr); |
2506 | |
2507 | if (BytesAdded) |
2508 | *BytesAdded = ST.hasOffset3fBug() ? 8 : 4; |
2509 | return 1; |
2510 | } |
2511 | |
2512 | assert(TBB && FBB); |
2513 | |
2514 | MachineInstr *CondBr = |
2515 | BuildMI(&MBB, DL, get(Opcode)) |
2516 | .addMBB(TBB); |
2517 | fixImplicitOperands(*CondBr); |
2518 | BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) |
2519 | .addMBB(FBB); |
2520 | |
2521 | MachineOperand &CondReg = CondBr->getOperand(1); |
2522 | CondReg.setIsUndef(Cond[1].isUndef()); |
2523 | CondReg.setIsKill(Cond[1].isKill()); |
2524 | |
2525 | if (BytesAdded) |
2526 | *BytesAdded = ST.hasOffset3fBug() ? 16 : 8; |
2527 | |
2528 | return 2; |
2529 | } |
2530 | |
2531 | bool SIInstrInfo::reverseBranchCondition( |
2532 | SmallVectorImpl<MachineOperand> &Cond) const { |
2533 | if (Cond.size() != 2) { |
2534 | return true; |
2535 | } |
2536 | |
2537 | if (Cond[0].isImm()) { |
2538 | Cond[0].setImm(-Cond[0].getImm()); |
2539 | return false; |
2540 | } |
2541 | |
2542 | return true; |
2543 | } |
2544 | |
2545 | bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, |
2546 | ArrayRef<MachineOperand> Cond, |
2547 | Register DstReg, Register TrueReg, |
2548 | Register FalseReg, int &CondCycles, |
2549 | int &TrueCycles, int &FalseCycles) const { |
2550 | switch (Cond[0].getImm()) { |
2551 | case VCCNZ: |
2552 | case VCCZ: { |
2553 | const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
2554 | const TargetRegisterClass *RC = MRI.getRegClass(TrueReg); |
2555 | if (MRI.getRegClass(FalseReg) != RC) |
2556 | return false; |
2557 | |
2558 | int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32; |
2559 | CondCycles = TrueCycles = FalseCycles = NumInsts; |
2560 | |
2561 | |
2562 | return RI.hasVGPRs(RC) && NumInsts <= 6; |
2563 | } |
2564 | case SCC_TRUE: |
2565 | case SCC_FALSE: { |
2566 | |
2567 | |
2568 | const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
2569 | const TargetRegisterClass *RC = MRI.getRegClass(TrueReg); |
2570 | if (MRI.getRegClass(FalseReg) != RC) |
2571 | return false; |
2572 | |
2573 | int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32; |
2574 | |
2575 | |
2576 | if (NumInsts % 2 == 0) |
2577 | NumInsts /= 2; |
2578 | |
2579 | CondCycles = TrueCycles = FalseCycles = NumInsts; |
2580 | return RI.isSGPRClass(RC); |
2581 | } |
2582 | default: |
2583 | return false; |
2584 | } |
2585 | } |
2586 | |
2587 | void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, |
2588 | MachineBasicBlock::iterator I, const DebugLoc &DL, |
2589 | Register DstReg, ArrayRef<MachineOperand> Cond, |
2590 | Register TrueReg, Register FalseReg) const { |
2591 | BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm()); |
2592 | if (Pred == VCCZ || Pred == SCC_FALSE) { |
2593 | Pred = static_cast<BranchPredicate>(-Pred); |
2594 | std::swap(TrueReg, FalseReg); |
2595 | } |
2596 | |
2597 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
2598 | const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); |
2599 | unsigned DstSize = RI.getRegSizeInBits(*DstRC); |
2600 | |
2601 | if (DstSize == 32) { |
2602 | MachineInstr *Select; |
2603 | if (Pred == SCC_TRUE) { |
2604 | Select = BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B32), DstReg) |
2605 | .addReg(TrueReg) |
2606 | .addReg(FalseReg); |
2607 | } else { |
2608 | |
2609 | Select = BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e32), DstReg) |
2610 | .addReg(FalseReg) |
2611 | .addReg(TrueReg); |
2612 | } |
2613 | |
2614 | preserveCondRegFlags(Select->getOperand(3), Cond[1]); |
2615 | return; |
2616 | } |
2617 | |
2618 | if (DstSize == 64 && Pred == SCC_TRUE) { |
2619 | MachineInstr *Select = |
2620 | BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg) |
2621 | .addReg(TrueReg) |
2622 | .addReg(FalseReg); |
2623 | |
2624 | preserveCondRegFlags(Select->getOperand(3), Cond[1]); |
2625 | return; |
2626 | } |
2627 | |
2628 | static const int16_t Sub0_15[] = { |
2629 | AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, |
2630 | AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, |
2631 | AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, |
2632 | AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, |
2633 | }; |
2634 | |
2635 | static const int16_t Sub0_15_64[] = { |
2636 | AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, |
2637 | AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, |
2638 | AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, |
2639 | AMDGPU::sub12_sub13, AMDGPU::sub14_sub15, |
2640 | }; |
2641 | |
2642 | unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32; |
2643 | const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass; |
2644 | const int16_t *SubIndices = Sub0_15; |
2645 | int NElts = DstSize / 32; |
2646 | |
2647 | |
2648 | |
2649 | if (Pred == SCC_TRUE) { |
2650 | if (NElts % 2) { |
2651 | SelOp = AMDGPU::S_CSELECT_B32; |
2652 | EltRC = &AMDGPU::SGPR_32RegClass; |
2653 | } else { |
2654 | SelOp = AMDGPU::S_CSELECT_B64; |
2655 | EltRC = &AMDGPU::SGPR_64RegClass; |
2656 | SubIndices = Sub0_15_64; |
2657 | NElts /= 2; |
2658 | } |
2659 | } |
2660 | |
2661 | MachineInstrBuilder MIB = BuildMI( |
2662 | MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg); |
2663 | |
2664 | I = MIB->getIterator(); |
2665 | |
2666 | SmallVector<Register, 8> Regs; |
2667 | for (int Idx = 0; Idx != NElts; ++Idx) { |
2668 | Register DstElt = MRI.createVirtualRegister(EltRC); |
2669 | Regs.push_back(DstElt); |
2670 | |
2671 | unsigned SubIdx = SubIndices[Idx]; |
2672 | |
2673 | MachineInstr *Select; |
2674 | if (SelOp == AMDGPU::V_CNDMASK_B32_e32) { |
2675 | Select = |
2676 | BuildMI(MBB, I, DL, get(SelOp), DstElt) |
2677 | .addReg(FalseReg, 0, SubIdx) |
2678 | .addReg(TrueReg, 0, SubIdx); |
2679 | } else { |
2680 | Select = |
2681 | BuildMI(MBB, I, DL, get(SelOp), DstElt) |
2682 | .addReg(TrueReg, 0, SubIdx) |
2683 | .addReg(FalseReg, 0, SubIdx); |
2684 | } |
2685 | |
2686 | preserveCondRegFlags(Select->getOperand(3), Cond[1]); |
2687 | fixImplicitOperands(*Select); |
2688 | |
2689 | MIB.addReg(DstElt) |
2690 | .addImm(SubIdx); |
2691 | } |
2692 | } |
2693 | |
2694 | bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const { |
2695 | switch (MI.getOpcode()) { |
2696 | case AMDGPU::V_MOV_B32_e32: |
2697 | case AMDGPU::V_MOV_B32_e64: |
2698 | case AMDGPU::V_MOV_B64_PSEUDO: { |
2699 | |
2700 | |
2701 | unsigned NumOps = MI.getDesc().getNumOperands() + |
2702 | MI.getDesc().getNumImplicitUses(); |
2703 | |
2704 | return MI.getNumOperands() == NumOps; |
2705 | } |
2706 | case AMDGPU::S_MOV_B32: |
2707 | case AMDGPU::S_MOV_B64: |
2708 | case AMDGPU::COPY: |
2709 | case AMDGPU::V_ACCVGPR_WRITE_B32_e64: |
2710 | case AMDGPU::V_ACCVGPR_READ_B32_e64: |
2711 | case AMDGPU::V_ACCVGPR_MOV_B32: |
2712 | return true; |
2713 | default: |
2714 | return false; |
2715 | } |
2716 | } |
2717 | |
2718 | unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind( |
2719 | unsigned Kind) const { |
2720 | switch(Kind) { |
2721 | case PseudoSourceValue::Stack: |
2722 | case PseudoSourceValue::FixedStack: |
2723 | return AMDGPUAS::PRIVATE_ADDRESS; |
2724 | case PseudoSourceValue::ConstantPool: |
2725 | case PseudoSourceValue::GOT: |
2726 | case PseudoSourceValue::JumpTable: |
2727 | case PseudoSourceValue::GlobalValueCallEntry: |
2728 | case PseudoSourceValue::ExternalSymbolCallEntry: |
2729 | case PseudoSourceValue::TargetCustom: |
2730 | return AMDGPUAS::CONSTANT_ADDRESS; |
2731 | } |
2732 | return AMDGPUAS::FLAT_ADDRESS; |
2733 | } |
2734 | |
2735 | static void removeModOperands(MachineInstr &MI) { |
2736 | unsigned Opc = MI.getOpcode(); |
2737 | int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, |
2738 | AMDGPU::OpName::src0_modifiers); |
2739 | int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, |
2740 | AMDGPU::OpName::src1_modifiers); |
2741 | int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc, |
2742 | AMDGPU::OpName::src2_modifiers); |
2743 | |
2744 | MI.RemoveOperand(Src2ModIdx); |
2745 | MI.RemoveOperand(Src1ModIdx); |
2746 | MI.RemoveOperand(Src0ModIdx); |
2747 | } |
2748 | |
2749 | bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, |
2750 | Register Reg, MachineRegisterInfo *MRI) const { |
2751 | if (!MRI->hasOneNonDBGUse(Reg)) |
2752 | return false; |
2753 | |
2754 | switch (DefMI.getOpcode()) { |
2755 | default: |
2756 | return false; |
2757 | case AMDGPU::S_MOV_B64: |
2758 | |
2759 | |
2760 | return false; |
2761 | |
2762 | case AMDGPU::V_MOV_B32_e32: |
2763 | case AMDGPU::S_MOV_B32: |
2764 | case AMDGPU::V_ACCVGPR_WRITE_B32_e64: |
2765 | break; |
2766 | } |
2767 | |
2768 | const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0); |
2769 | assert(ImmOp); |
2770 | |
2771 | if (!ImmOp->isImm()) |
2772 | return false; |
2773 | |
2774 | unsigned Opc = UseMI.getOpcode(); |
2775 | if (Opc == AMDGPU::COPY) { |
2776 | Register DstReg = UseMI.getOperand(0).getReg(); |
2777 | bool Is16Bit = getOpSize(UseMI, 0) == 2; |
2778 | bool isVGPRCopy = RI.isVGPR(*MRI, DstReg); |
2779 | unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; |
2780 | APInt Imm(32, ImmOp->getImm()); |
2781 | |
2782 | if (UseMI.getOperand(1).getSubReg() == AMDGPU::hi16) |
2783 | Imm = Imm.ashr(16); |
2784 | |
2785 | if (RI.isAGPR(*MRI, DstReg)) { |
2786 | if (!isInlineConstant(Imm)) |
2787 | return false; |
2788 | NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64; |
2789 | } |
2790 | |
2791 | if (Is16Bit) { |
2792 | if (isVGPRCopy) |
2793 | return false; |
2794 | |
2795 | if (DstReg.isVirtual() && |
2796 | UseMI.getOperand(0).getSubReg() != AMDGPU::lo16) |
2797 | return false; |
2798 | |
2799 | UseMI.getOperand(0).setSubReg(0); |
2800 | if (DstReg.isPhysical()) { |
2801 | DstReg = RI.get32BitRegister(DstReg); |
2802 | UseMI.getOperand(0).setReg(DstReg); |
2803 | } |
2804 | assert(UseMI.getOperand(1).getReg().isVirtual()); |
2805 | } |
2806 | |
2807 | UseMI.setDesc(get(NewOpc)); |
2808 | UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue()); |
2809 | UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent()); |
2810 | return true; |
2811 | } |
2812 | |
2813 | if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 || |
2814 | Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 || |
2815 | Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || |
2816 | Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64) { |
2817 | |
2818 | |
2819 | if (hasAnyModifiersSet(UseMI)) |
2820 | return false; |
2821 | |
2822 | |
2823 | |
2824 | |
2825 | MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0); |
2826 | |
2827 | |
2828 | if (isInlineConstant(UseMI, *Src0, *ImmOp)) |
2829 | return false; |
2830 | |
2831 | bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 || |
2832 | Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64; |
2833 | bool IsFMA = Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || |
2834 | Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64; |
2835 | MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1); |
2836 | MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2); |
2837 | |
2838 | |
2839 | |
2840 | if (Src0->isReg() && Src0->getReg() == Reg) { |
2841 | if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) |
2842 | return false; |
2843 | |
2844 | if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))) |
2845 | return false; |
2846 | |
2847 | unsigned NewOpc = |
2848 | IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32 : AMDGPU::V_FMAMK_F16) |
2849 | : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16); |
2850 | if (pseudoToMCOpcode(NewOpc) == -1) |
2851 | return false; |
2852 | |
2853 | |
2854 | |
2855 | const int64_t Imm = ImmOp->getImm(); |
2856 | |
2857 | |
2858 | |
2859 | |
2860 | |
2861 | UseMI.RemoveOperand( |
2862 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); |
2863 | UseMI.RemoveOperand( |
2864 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); |
2865 | |
2866 | Register Src1Reg = Src1->getReg(); |
2867 | unsigned Src1SubReg = Src1->getSubReg(); |
2868 | Src0->setReg(Src1Reg); |
2869 | Src0->setSubReg(Src1SubReg); |
2870 | Src0->setIsKill(Src1->isKill()); |
2871 | |
2872 | if (Opc == AMDGPU::V_MAC_F32_e64 || |
2873 | Opc == AMDGPU::V_MAC_F16_e64 || |
2874 | Opc == AMDGPU::V_FMAC_F32_e64 || |
2875 | Opc == AMDGPU::V_FMAC_F16_e64) |
2876 | UseMI.untieRegOperand( |
2877 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); |
2878 | |
2879 | Src1->ChangeToImmediate(Imm); |
2880 | |
2881 | removeModOperands(UseMI); |
2882 | UseMI.setDesc(get(NewOpc)); |
2883 | |
2884 | bool DeleteDef = MRI->hasOneNonDBGUse(Reg); |
2885 | if (DeleteDef) |
2886 | DefMI.eraseFromParent(); |
2887 | |
2888 | return true; |
2889 | } |
2890 | |
2891 | |
2892 | if (Src2->isReg() && Src2->getReg() == Reg) { |
2893 | |
2894 | |
2895 | bool Src0Inlined = false; |
2896 | if (Src0->isReg()) { |
2897 | |
2898 | |
2899 | |
2900 | MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg()); |
2901 | if (Def && Def->isMoveImmediate() && |
2902 | isInlineConstant(Def->getOperand(1)) && |
2903 | MRI->hasOneUse(Src0->getReg())) { |
2904 | Src0->ChangeToImmediate(Def->getOperand(1).getImm()); |
2905 | Src0Inlined = true; |
2906 | } else if ((Src0->getReg().isPhysical() && |
2907 | (ST.getConstantBusLimit(Opc) <= 1 && |
2908 | RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) || |
2909 | (Src0->getReg().isVirtual() && |
2910 | (ST.getConstantBusLimit(Opc) <= 1 && |
2911 | RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))) |
2912 | return false; |
2913 | |
2914 | } |
2915 | |
2916 | if (Src1->isReg() && !Src0Inlined ) { |
2917 | |
2918 | MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg()); |
2919 | if (Def && Def->isMoveImmediate() && |
2920 | isInlineConstant(Def->getOperand(1)) && |
2921 | MRI->hasOneUse(Src1->getReg()) && |
2922 | commuteInstruction(UseMI)) { |
2923 | Src0->ChangeToImmediate(Def->getOperand(1).getImm()); |
2924 | } else if ((Src1->getReg().isPhysical() && |
2925 | RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) || |
2926 | (Src1->getReg().isVirtual() && |
2927 | RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) |
2928 | return false; |
2929 | |
2930 | } |
2931 | |
2932 | unsigned NewOpc = |
2933 | IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32 : AMDGPU::V_FMAAK_F16) |
2934 | : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16); |
2935 | if (pseudoToMCOpcode(NewOpc) == -1) |
2936 | return false; |
2937 | |
2938 | const int64_t Imm = ImmOp->getImm(); |
2939 | |
2940 | |
2941 | |
2942 | |
2943 | |
2944 | UseMI.RemoveOperand( |
2945 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); |
2946 | UseMI.RemoveOperand( |
2947 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); |
2948 | |
2949 | if (Opc == AMDGPU::V_MAC_F32_e64 || |
2950 | Opc == AMDGPU::V_MAC_F16_e64 || |
2951 | Opc == AMDGPU::V_FMAC_F32_e64 || |
2952 | Opc == AMDGPU::V_FMAC_F16_e64) |
2953 | UseMI.untieRegOperand( |
2954 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); |
2955 | |
2956 | |
2957 | Src2->ChangeToImmediate(Imm); |
2958 | |
2959 | |
2960 | removeModOperands(UseMI); |
2961 | UseMI.setDesc(get(NewOpc)); |
2962 | |
2963 | |
2964 | |
2965 | legalizeOperands(UseMI); |
2966 | |
2967 | bool DeleteDef = MRI->hasOneNonDBGUse(Reg); |
2968 | if (DeleteDef) |
2969 | DefMI.eraseFromParent(); |
2970 | |
2971 | return true; |
2972 | } |
2973 | } |
2974 | |
2975 | return false; |
2976 | } |
2977 | |
2978 | static bool |
2979 | memOpsHaveSameBaseOperands(ArrayRef<const MachineOperand *> BaseOps1, |
2980 | ArrayRef<const MachineOperand *> BaseOps2) { |
2981 | if (BaseOps1.size() != BaseOps2.size()) |
2982 | return false; |
2983 | for (size_t I = 0, E = BaseOps1.size(); I < E; ++I) { |
2984 | if (!BaseOps1[I]->isIdenticalTo(*BaseOps2[I])) |
2985 | return false; |
2986 | } |
2987 | return true; |
2988 | } |
2989 | |
2990 | static bool offsetsDoNotOverlap(int WidthA, int OffsetA, |
2991 | int WidthB, int OffsetB) { |
2992 | int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; |
2993 | int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; |
2994 | int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; |
2995 | return LowOffset + LowWidth <= HighOffset; |
2996 | } |
2997 | |
2998 | bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, |
2999 | const MachineInstr &MIb) const { |
3000 | SmallVector<const MachineOperand *, 4> BaseOps0, BaseOps1; |
3001 | int64_t Offset0, Offset1; |
3002 | unsigned Dummy0, Dummy1; |
3003 | bool Offset0IsScalable, Offset1IsScalable; |
3004 | if (!getMemOperandsWithOffsetWidth(MIa, BaseOps0, Offset0, Offset0IsScalable, |
3005 | Dummy0, &RI) || |
3006 | !getMemOperandsWithOffsetWidth(MIb, BaseOps1, Offset1, Offset1IsScalable, |
3007 | Dummy1, &RI)) |
3008 | return false; |
3009 | |
3010 | if (!memOpsHaveSameBaseOperands(BaseOps0, BaseOps1)) |
3011 | return false; |
3012 | |
3013 | if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) { |
3014 | |
3015 | return false; |
3016 | } |
3017 | unsigned Width0 = MIa.memoperands().front()->getSize(); |
3018 | unsigned Width1 = MIb.memoperands().front()->getSize(); |
3019 | return offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1); |
3020 | } |
3021 | |
3022 | bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, |
3023 | const MachineInstr &MIb) const { |
3024 | assert(MIa.mayLoadOrStore() && |
3025 | "MIa must load from or modify a memory location"); |
3026 | assert(MIb.mayLoadOrStore() && |
3027 | "MIb must load from or modify a memory location"); |
3028 | |
3029 | if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects()) |
3030 | return false; |
3031 | |
3032 | |
3033 | if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) |
3034 | return false; |
3035 | |
3036 | |
3037 | |
3038 | |
3039 | |
3040 | |
3041 | if (isDS(MIa)) { |
3042 | if (isDS(MIb)) |
3043 | return checkInstOffsetsDoNotOverlap(MIa, MIb); |
3044 | |
3045 | return !isFLAT(MIb) || isSegmentSpecificFLAT(MIb); |
3046 | } |
3047 | |
3048 | if (isMUBUF(MIa) || isMTBUF(MIa)) { |
3049 | if (isMUBUF(MIb) || isMTBUF(MIb)) |
3050 | return checkInstOffsetsDoNotOverlap(MIa, MIb); |
3051 | |
3052 | return !isFLAT(MIb) && !isSMRD(MIb); |
3053 | } |
3054 | |
3055 | if (isSMRD(MIa)) { |
3056 | if (isSMRD(MIb)) |
3057 | return checkInstOffsetsDoNotOverlap(MIa, MIb); |
3058 | |
3059 | return !isFLAT(MIb) && !isMUBUF(MIb) && !isMTBUF(MIb); |
3060 | } |
3061 | |
3062 | if (isFLAT(MIa)) { |
3063 | if (isFLAT(MIb)) |
3064 | return checkInstOffsetsDoNotOverlap(MIa, MIb); |
3065 | |
3066 | return false; |
3067 | } |
3068 | |
3069 | return false; |
3070 | } |
3071 | |
3072 | static int64_t getFoldableImm(const MachineOperand* MO) { |
3073 | if (!MO->isReg()) |
3074 | return false; |
3075 | const MachineFunction *MF = MO->getParent()->getParent()->getParent(); |
3076 | const MachineRegisterInfo &MRI = MF->getRegInfo(); |
3077 | auto Def = MRI.getUniqueVRegDef(MO->getReg()); |
3078 | if (Def && Def->getOpcode() == AMDGPU::V_MOV_B32_e32 && |
3079 | Def->getOperand(1).isImm()) |
3080 | return Def->getOperand(1).getImm(); |
3081 | return AMDGPU::NoRegister; |
3082 | } |
3083 | |
3084 | static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, |
3085 | MachineInstr &NewMI) { |
3086 | if (LV) { |
3087 | unsigned NumOps = MI.getNumOperands(); |
3088 | for (unsigned I = 1; I < NumOps; ++I) { |
3089 | MachineOperand &Op = MI.getOperand(I); |
3090 | if (Op.isReg() && Op.isKill()) |
3091 | LV->replaceKillInstruction(Op.getReg(), MI, NewMI); |
3092 | } |
3093 | } |
3094 | } |
3095 | |
3096 | MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, |
3097 | MachineInstr &MI, |
3098 | LiveVariables *LV) const { |
3099 | unsigned Opc = MI.getOpcode(); |
3100 | bool IsF16 = false; |
3101 | bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 || |
3102 | Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || |
3103 | Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; |
3104 | bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; |
3105 | |
3106 | switch (Opc) { |
3107 | default: |
3108 | return nullptr; |
3109 | case AMDGPU::V_MAC_F16_e64: |
3110 | case AMDGPU::V_FMAC_F16_e64: |
3111 | IsF16 = true; |
3112 | LLVM_FALLTHROUGH; |
3113 | case AMDGPU::V_MAC_F32_e64: |
3114 | case AMDGPU::V_FMAC_F32_e64: |
3115 | case AMDGPU::V_FMAC_F64_e64: |
3116 | break; |
3117 | case AMDGPU::V_MAC_F16_e32: |
3118 | case AMDGPU::V_FMAC_F16_e32: |
3119 | IsF16 = true; |
3120 | LLVM_FALLTHROUGH; |
3121 | case AMDGPU::V_MAC_F32_e32: |
3122 | case AMDGPU::V_FMAC_F32_e32: |
3123 | case AMDGPU::V_FMAC_F64_e32: { |
3124 | int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), |
3125 | AMDGPU::OpName::src0); |
3126 | const MachineOperand *Src0 = &MI.getOperand(Src0Idx); |
3127 | if (!Src0->isReg() && !Src0->isImm()) |
3128 | return nullptr; |
3129 | |
3130 | if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0)) |
3131 | return nullptr; |
3132 | |
3133 | break; |
3134 | } |
3135 | } |
3136 | |
3137 | const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); |
3138 | const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0); |
3139 | const MachineOperand *Src0Mods = |
3140 | getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); |
3141 | const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); |
3142 | const MachineOperand *Src1Mods = |
3143 | getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); |
3144 | const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); |
3145 | const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); |
3146 | const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod); |
3147 | MachineInstrBuilder MIB; |
3148 | |
3149 | if (!Src0Mods && !Src1Mods && !Clamp && !Omod && !IsF64 && |
3150 | |
3151 | (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || |
3152 | !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) { |
3153 | if (auto Imm = getFoldableImm(Src2)) { |
3154 | unsigned NewOpc = |
3155 | IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32) |
3156 | : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32); |
3157 | if (pseudoToMCOpcode(NewOpc) != -1) { |
3158 | MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) |
3159 | .add(*Dst) |
3160 | .add(*Src0) |
3161 | .add(*Src1) |
3162 | .addImm(Imm); |
3163 | updateLiveVariables(LV, MI, *MIB); |
3164 | return MIB; |
3165 | } |
3166 | } |
3167 | unsigned NewOpc = IsFMA |
3168 | ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32) |
3169 | : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); |
3170 | if (auto Imm = getFoldableImm(Src1)) { |
3171 | if (pseudoToMCOpcode(NewOpc) != -1) { |
3172 | MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) |
3173 | .add(*Dst) |
3174 | .add(*Src0) |
3175 | .addImm(Imm) |
3176 | .add(*Src2); |
3177 | updateLiveVariables(LV, MI, *MIB); |
3178 | return MIB; |
3179 | } |
3180 | } |
3181 | if (auto Imm = getFoldableImm(Src0)) { |
3182 | if (pseudoToMCOpcode(NewOpc) != -1 && |
3183 | isOperandLegal( |
3184 | MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0), |
3185 | Src1)) { |
3186 | MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) |
3187 | .add(*Dst) |
3188 | .add(*Src1) |
3189 | .addImm(Imm) |
3190 | .add(*Src2); |
3191 | updateLiveVariables(LV, MI, *MIB); |
3192 | return MIB; |
3193 | } |
3194 | } |
3195 | } |
3196 | |
3197 | unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMA_F16_e64 |
3198 | : IsF64 ? AMDGPU::V_FMA_F64_e64 |
3199 | : AMDGPU::V_FMA_F32_e64) |
3200 | : (IsF16 ? AMDGPU::V_MAD_F16_e64 : AMDGPU::V_MAD_F32_e64); |
3201 | if (pseudoToMCOpcode(NewOpc) == -1) |
3202 | return nullptr; |
3203 | |
3204 | MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) |
3205 | .add(*Dst) |
3206 | .addImm(Src0Mods ? Src0Mods->getImm() : 0) |
3207 | .add(*Src0) |
3208 | .addImm(Src1Mods ? Src1Mods->getImm() : 0) |
3209 | .add(*Src1) |
3210 | .addImm(0) |
3211 | .add(*Src2) |
3212 | .addImm(Clamp ? Clamp->getImm() : 0) |
3213 | .addImm(Omod ? Omod->getImm() : 0); |
3214 | updateLiveVariables(LV, MI, *MIB); |
3215 | return MIB; |
3216 | } |
3217 | |
3218 | |
3219 | |
3220 | |
3221 | static bool changesVGPRIndexingMode(const MachineInstr &MI) { |
3222 | switch (MI.getOpcode()) { |
3223 | case AMDGPU::S_SET_GPR_IDX_ON: |
3224 | case AMDGPU::S_SET_GPR_IDX_MODE: |
3225 | case AMDGPU::S_SET_GPR_IDX_OFF: |
3226 | return true; |
3227 | default: |
3228 | return false; |
3229 | } |
3230 | } |
3231 | |
3232 | bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, |
3233 | const MachineBasicBlock *MBB, |
3234 | const MachineFunction &MF) const { |
3235 | |
3236 | |
3237 | |
3238 | |
3239 | |
3240 | |
3241 | |
3242 | |
3243 | if (MI.isTerminator() || MI.isPosition()) |
3244 | return true; |
3245 | |
3246 | |
3247 | if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) |
3248 | return true; |
3249 | |
3250 | |
3251 | |
3252 | |
3253 | return MI.modifiesRegister(AMDGPU::EXEC, &RI) || |
3254 | MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || |
3255 | MI.getOpcode() == AMDGPU::S_SETREG_B32 || |
3256 | changesVGPRIndexingMode(MI); |
3257 | } |
3258 | |
3259 | bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const { |
3260 | return Opcode == AMDGPU::DS_ORDERED_COUNT || |
3261 | Opcode == AMDGPU::DS_GWS_INIT || |
3262 | Opcode == AMDGPU::DS_GWS_SEMA_V || |
3263 | Opcode == AMDGPU::DS_GWS_SEMA_BR || |
3264 | Opcode == AMDGPU::DS_GWS_SEMA_P || |
3265 | Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL || |
3266 | Opcode == AMDGPU::DS_GWS_BARRIER; |
3267 | } |
3268 | |
3269 | bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) { |
3270 | |
3271 | |
3272 | |
3273 | if (const MCPhysReg *ImpDef = MI.getDesc().getImplicitDefs()) { |
3274 | for (; ImpDef && *ImpDef; ++ImpDef) { |
3275 | if (*ImpDef == AMDGPU::MODE) |
3276 | return true; |
3277 | } |
3278 | } |
3279 | |
3280 | return false; |
3281 | } |
3282 | |
3283 | bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const { |
3284 | unsigned Opcode = MI.getOpcode(); |
3285 | |
3286 | if (MI.mayStore() && isSMRD(MI)) |
3287 | return true; |
3288 | |
3289 | |
3290 | if (MI.isReturn()) |
3291 | return true; |
3292 | |
3293 | |
3294 | |
3295 | |
3296 | |
3297 | |
3298 | |
3299 | if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT || |
3300 | isEXP(Opcode) || |
3301 | Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP || |
3302 | Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER) |
3303 | return true; |
3304 | |
3305 | if (MI.isCall() || MI.isInlineAsm()) |
3306 | return true; |
3307 | |
3308 | |
3309 | if (modifiesModeRegister(MI)) |
3310 | return true; |
3311 | |
3312 | |
3313 | |
3314 | |
3315 | |
3316 | |
3317 | if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || |
3318 | Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32) |
3319 | return true; |
3320 | |
3321 | return false; |
3322 | } |
3323 | |
3324 | bool SIInstrInfo::mayReadEXEC(const MachineRegisterInfo &MRI, |
3325 | const MachineInstr &MI) const { |
3326 | if (MI.isMetaInstruction()) |
3327 | return false; |
3328 | |
3329 | |
3330 | if (MI.isCopyLike()) { |
3331 | if (!RI.isSGPRReg(MRI, MI.getOperand(0).getReg())) |
3332 | return true; |
3333 | |
3334 | |
3335 | return MI.readsRegister(AMDGPU::EXEC, &RI); |
3336 | } |
3337 | |
3338 | |
3339 | if (MI.isCall()) |
3340 | return true; |
3341 | |
3342 | |
3343 | if (!isTargetSpecificOpcode(MI.getOpcode())) |
3344 | return true; |
3345 | |
3346 | return !isSALU(MI) || MI.readsRegister(AMDGPU::EXEC, &RI); |
3347 | } |
3348 | |
3349 | bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { |
3350 | switch (Imm.getBitWidth()) { |
3351 | case 1: |
3352 | return true; |
3353 | |
3354 | case 32: |
3355 | return AMDGPU::isInlinableLiteral32(Imm.getSExtValue(), |
3356 | ST.hasInv2PiInlineImm()); |
3357 | case 64: |
3358 | return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(), |
3359 | ST.hasInv2PiInlineImm()); |
3360 | case 16: |
3361 | return ST.has16BitInsts() && |
3362 | AMDGPU::isInlinableLiteral16(Imm.getSExtValue(), |
3363 | ST.hasInv2PiInlineImm()); |
3364 | default: |
3365 | llvm_unreachable("invalid bitwidth"); |
3366 | } |
3367 | } |
3368 | |
3369 | bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, |
3370 | uint8_t OperandType) const { |
3371 | if (!MO.isImm() || |
3372 | OperandType < AMDGPU::OPERAND_SRC_FIRST || |
3373 | OperandType > AMDGPU::OPERAND_SRC_LAST) |
3374 | return false; |
3375 | |
3376 | |
3377 | |
3378 | |
3379 | |
3380 | |
3381 | int64_t Imm = MO.getImm(); |
3382 | switch (OperandType) { |
3383 | case AMDGPU::OPERAND_REG_IMM_INT32: |
3384 | case AMDGPU::OPERAND_REG_IMM_FP32: |
3385 | case AMDGPU::OPERAND_REG_INLINE_C_INT32: |
3386 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: |
3387 | case AMDGPU::OPERAND_REG_IMM_V2FP32: |
3388 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: |
3389 | case AMDGPU::OPERAND_REG_IMM_V2INT32: |
3390 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: |
3391 | case AMDGPU::OPERAND_REG_INLINE_AC_INT32: |
3392 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: { |
3393 | int32_t Trunc = static_cast<int32_t>(Imm); |
3394 | return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm()); |
3395 | } |
3396 | case AMDGPU::OPERAND_REG_IMM_INT64: |
3397 | case AMDGPU::OPERAND_REG_IMM_FP64: |
3398 | case AMDGPU::OPERAND_REG_INLINE_C_INT64: |
3399 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: |
3400 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: |
3401 | return AMDGPU::isInlinableLiteral64(MO.getImm(), |
3402 | ST.hasInv2PiInlineImm()); |
3403 | case AMDGPU::OPERAND_REG_IMM_INT16: |
3404 | case AMDGPU::OPERAND_REG_INLINE_C_INT16: |
3405 | case AMDGPU::OPERAND_REG_INLINE_AC_INT16: |
3406 | |
3407 | |
3408 | |
3409 | |
3410 | |
3411 | |
3412 | |
3413 | |
3414 | |
3415 | |
3416 | return AMDGPU::isInlinableIntLiteral(Imm); |
3417 | case AMDGPU::OPERAND_REG_IMM_V2INT16: |
3418 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: |
3419 | case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: |
3420 | |
3421 | return AMDGPU::isInlinableIntLiteralV216(Imm); |
3422 | case AMDGPU::OPERAND_REG_IMM_FP16: |
3423 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: |
3424 | case AMDGPU::OPERAND_REG_INLINE_AC_FP16: { |
3425 | if (isInt<16>(Imm) || isUInt<16>(Imm)) { |
3426 | |
3427 | |
3428 | |
3429 | |
3430 | int16_t Trunc = static_cast<int16_t>(Imm); |
3431 | return ST.has16BitInsts() && |
3432 | AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm()); |
3433 | } |
3434 | |
3435 | return false; |
3436 | } |
3437 | case AMDGPU::OPERAND_REG_IMM_V2FP16: |
3438 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: |
3439 | case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { |
3440 | uint32_t Trunc = static_cast<uint32_t>(Imm); |
3441 | return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm()); |
3442 | } |
3443 | default: |
3444 | llvm_unreachable("invalid bitwidth"); |
3445 | } |
3446 | } |
3447 | |
3448 | bool SIInstrInfo::isLiteralConstantLike(const MachineOperand &MO, |
3449 | const MCOperandInfo &OpInfo) const { |
3450 | switch (MO.getType()) { |
3451 | case MachineOperand::MO_Register: |
3452 | return false; |
3453 | case MachineOperand::MO_Immediate: |
3454 | return !isInlineConstant(MO, OpInfo); |
3455 | case MachineOperand::MO_FrameIndex: |
3456 | case MachineOperand::MO_MachineBasicBlock: |
3457 | case MachineOperand::MO_ExternalSymbol: |
3458 | case MachineOperand::MO_GlobalAddress: |
3459 | case MachineOperand::MO_MCSymbol: |
3460 | return true; |
3461 | default: |
3462 | llvm_unreachable("unexpected operand type"); |
3463 | } |
3464 | } |
3465 | |
3466 | static bool compareMachineOp(const MachineOperand &Op0, |
3467 | const MachineOperand &Op1) { |
3468 | if (Op0.getType() != Op1.getType()) |
3469 | return false; |
3470 | |
3471 | switch (Op0.getType()) { |
3472 | case MachineOperand::MO_Register: |
3473 | return Op0.getReg() == Op1.getReg(); |
3474 | case MachineOperand::MO_Immediate: |
3475 | return Op0.getImm() == Op1.getImm(); |
3476 | default: |
3477 | llvm_unreachable("Didn't expect to be comparing these operand types"); |
3478 | } |
3479 | } |
3480 | |
3481 | bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, |
3482 | const MachineOperand &MO) const { |
3483 | const MCInstrDesc &InstDesc = MI.getDesc(); |
3484 | const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo]; |
3485 | |
3486 | assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal()); |
3487 | |
3488 | if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) |
3489 | return true; |
3490 | |
3491 | if (OpInfo.RegClass < 0) |
3492 | return false; |
3493 | |
3494 | if (MO.isImm() && isInlineConstant(MO, OpInfo)) { |
3495 | if (isMAI(MI) && ST.hasMFMAInlineLiteralBug() && |
3496 | OpNo ==(unsigned)AMDGPU::getNamedOperandIdx(MI.getOpcode(), |
3497 | AMDGPU::OpName::src2)) |
3498 | return false; |
3499 | return RI.opCanUseInlineConstant(OpInfo.OperandType); |
3500 | } |
3501 | |
3502 | if (!RI.opCanUseLiteralConstant(OpInfo.OperandType)) |
3503 | return false; |
3504 | |
3505 | if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo)) |
3506 | return true; |
3507 | |
3508 | return ST.hasVOP3Literal(); |
3509 | } |
3510 | |
3511 | bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { |
3512 | |
3513 | if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts()) |
3514 | return false; |
3515 | |
3516 | int Op32 = AMDGPU::getVOPe32(Opcode); |
3517 | if (Op32 == -1) |
3518 | return false; |
3519 | |
3520 | return pseudoToMCOpcode(Op32) != -1; |
3521 | } |
3522 | |
3523 | bool SIInstrInfo::hasModifiers(unsigned Opcode) const { |
3524 | |
3525 | |
3526 | |
3527 | return AMDGPU::getNamedOperandIdx(Opcode, |
3528 | AMDGPU::OpName::src0_modifiers) != -1; |
3529 | } |
3530 | |
3531 | bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, |
3532 | unsigned OpName) const { |
3533 | const MachineOperand *Mods = getNamedOperand(MI, OpName); |
3534 | return Mods && Mods->getImm(); |
3535 | } |
3536 | |
3537 | bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const { |
3538 | return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) || |
3539 | hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) || |
3540 | hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) || |
3541 | hasModifiersSet(MI, AMDGPU::OpName::clamp) || |
3542 | hasModifiersSet(MI, AMDGPU::OpName::omod); |
3543 | } |
3544 | |
3545 | bool SIInstrInfo::canShrink(const MachineInstr &MI, |
3546 | const MachineRegisterInfo &MRI) const { |
3547 | const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); |
3548 | |
3549 | |
3550 | |
3551 | |
3552 | |
3553 | |
3554 | |
3555 | if (Src2) { |
3556 | switch (MI.getOpcode()) { |
3557 | default: return false; |
3558 | |
3559 | case AMDGPU::V_ADDC_U32_e64: |
3560 | case AMDGPU::V_SUBB_U32_e64: |
3561 | case AMDGPU::V_SUBBREV_U32_e64: { |
3562 | const MachineOperand *Src1 |
3563 | = getNamedOperand(MI, AMDGPU::OpName::src1); |
3564 | if (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg())) |
3565 | return false; |
3566 | |
3567 | return true; |
3568 | } |
3569 | case AMDGPU::V_MAC_F32_e64: |
3570 | case AMDGPU::V_MAC_F16_e64: |
3571 | case AMDGPU::V_FMAC_F32_e64: |
3572 | case AMDGPU::V_FMAC_F16_e64: |
3573 | case AMDGPU::V_FMAC_F64_e64: |
3574 | if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) || |
3575 | hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers)) |
3576 | return false; |
3577 | break; |
3578 | |
3579 | case AMDGPU::V_CNDMASK_B32_e64: |
3580 | break; |
3581 | } |
3582 | } |
3583 | |
3584 | const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); |
3585 | if (Src1 && (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()) || |
3586 | hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers))) |
3587 | return false; |
3588 | |
3589 | |
3590 | |
3591 | if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers)) |
3592 | return false; |
3593 | |
3594 | |
3595 | if (!hasVALU32BitEncoding(MI.getOpcode())) |
3596 | return false; |
3597 | |
3598 | |
3599 | return !hasModifiersSet(MI, AMDGPU::OpName::omod) && |
3600 | !hasModifiersSet(MI, AMDGPU::OpName::clamp); |
3601 | } |
3602 | |
3603 | |
3604 | |
3605 | static void copyFlagsToImplicitVCC(MachineInstr &MI, |
3606 | const MachineOperand &Orig) { |
3607 | |
3608 | for (MachineOperand &Use : MI.implicit_operands()) { |
3609 | if (Use.isUse() && |
3610 | (Use.getReg() == AMDGPU::VCC || Use.getReg() == AMDGPU::VCC_LO)) { |
3611 | Use.setIsUndef(Orig.isUndef()); |
3612 | Use.setIsKill(Orig.isKill()); |
3613 | return; |
3614 | } |
3615 | } |
3616 | } |
3617 | |
3618 | MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI, |
3619 | unsigned Op32) const { |
3620 | MachineBasicBlock *MBB = MI.getParent();; |
3621 | MachineInstrBuilder Inst32 = |
3622 | BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32)) |
3623 | .setMIFlags(MI.getFlags()); |
3624 | |
3625 | |
3626 | |
3627 | int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst); |
3628 | if (Op32DstIdx != -1) { |
3629 | |
3630 | Inst32.add(MI.getOperand(0)); |
3631 | } else { |
3632 | assert(((MI.getOperand(0).getReg() == AMDGPU::VCC) || |
3633 | (MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) && |
3634 | "Unexpected case"); |
3635 | } |
3636 | |
3637 | Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0)); |
3638 | |
3639 | const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); |
3640 | if (Src1) |
3641 | Inst32.add(*Src1); |
3642 | |
3643 | const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); |
3644 | |
3645 | if (Src2) { |
3646 | int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2); |
3647 | if (Op32Src2Idx != -1) { |
3648 | Inst32.add(*Src2); |
3649 | } else { |
3650 | |
3651 | |
3652 | |
3653 | |
3654 | |
3655 | fixImplicitOperands(*Inst32); |
3656 | copyFlagsToImplicitVCC(*Inst32, *Src2); |
3657 | } |
3658 | } |
3659 | |
3660 | return Inst32; |
3661 | } |
3662 | |
3663 | bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, |
3664 | const MachineOperand &MO, |
3665 | const MCOperandInfo &OpInfo) const { |
3666 | |
3667 | |
3668 | |
3669 | if (MO.isImm()) |
3670 | return !isInlineConstant(MO, OpInfo); |
3671 | |
3672 | if (!MO.isReg()) |
3673 | return true; |
3674 | |
3675 | if (!MO.isUse()) |
3676 | return false; |
3677 | |
3678 | if (MO.getReg().isVirtual()) |
3679 | return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); |
3680 | |
3681 | |
3682 | if (MO.getReg() == AMDGPU::SGPR_NULL) |
3683 | return false; |
3684 | |
3685 | |
3686 | if (MO.isImplicit()) { |
3687 | return MO.getReg() == AMDGPU::M0 || |
3688 | MO.getReg() == AMDGPU::VCC || |
3689 | MO.getReg() == AMDGPU::VCC_LO; |
3690 | } else { |
3691 | return AMDGPU::SReg_32RegClass.contains(MO.getReg()) || |
3692 | AMDGPU::SReg_64RegClass.contains(MO.getReg()); |
3693 | } |
3694 | } |
3695 | |
3696 | static Register findImplicitSGPRRead(const MachineInstr &MI) { |
3697 | for (const MachineOperand &MO : MI.implicit_operands()) { |
3698 | |
3699 | if (MO.isDef()) |
3700 | continue; |
3701 | |
3702 | switch (MO.getReg()) { |
3703 | case AMDGPU::VCC: |
3704 | case AMDGPU::VCC_LO: |
3705 | case AMDGPU::VCC_HI: |
3706 | case AMDGPU::M0: |
3707 | case AMDGPU::FLAT_SCR: |
3708 | return MO.getReg(); |
3709 | |
3710 | default: |
3711 | break; |
3712 | } |
3713 | } |
3714 | |
3715 | return AMDGPU::NoRegister; |
3716 | } |
3717 | |
3718 | static bool shouldReadExec(const MachineInstr &MI) { |
3719 | if (SIInstrInfo::isVALU(MI)) { |
3720 | switch (MI.getOpcode()) { |
3721 | case AMDGPU::V_READLANE_B32: |
3722 | case AMDGPU::V_WRITELANE_B32: |
3723 | return false; |
3724 | } |
3725 | |
3726 | return true; |
3727 | } |
3728 | |
3729 | if (MI.isPreISelOpcode() || |
3730 | SIInstrInfo::isGenericOpcode(MI.getOpcode()) || |
3731 | SIInstrInfo::isSALU(MI) || |
3732 | SIInstrInfo::isSMRD(MI)) |
3733 | return false; |
3734 | |
3735 | return true; |
3736 | } |
3737 | |
3738 | static bool isSubRegOf(const SIRegisterInfo &TRI, |
3739 | const MachineOperand &SuperVec, |
3740 | const MachineOperand &SubReg) { |
3741 | if (SubReg.getReg().isPhysical()) |
3742 | return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg()); |
3743 | |
3744 | return SubReg.getSubReg() != AMDGPU::NoSubRegister && |
3745 | SubReg.getReg() == SuperVec.getReg(); |
3746 | } |
3747 | |
3748 | bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, |
3749 | StringRef &ErrInfo) const { |
3750 | uint16_t Opcode = MI.getOpcode(); |
3751 | if (SIInstrInfo::isGenericOpcode(MI.getOpcode())) |
3752 | return true; |
3753 | |
3754 | const MachineFunction *MF = MI.getParent()->getParent(); |
3755 | const MachineRegisterInfo &MRI = MF->getRegInfo(); |
3756 | |
3757 | int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); |
3758 | int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); |
3759 | int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); |
3760 | |
3761 | |
3762 | const MCInstrDesc &Desc = get(Opcode); |
3763 | if (!Desc.isVariadic() && |
3764 | Desc.getNumOperands() != MI.getNumExplicitOperands()) { |
3765 | ErrInfo = "Instruction has wrong number of operands."; |
3766 | return false; |
3767 | } |
3768 | |
3769 | if (MI.isInlineAsm()) { |
3770 | |
3771 | for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands(); |
3772 | I != E; ++I) { |
3773 | const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI); |
3774 | if (!RC) |
3775 | continue; |
3776 | |
3777 | const MachineOperand &Op = MI.getOperand(I); |
3778 | if (!Op.isReg()) |
3779 | continue; |
3780 | |
3781 | Register Reg = Op.getReg(); |
3782 | if (!Reg.isVirtual() && !RC->contains(Reg)) { |
3783 | ErrInfo = "inlineasm operand has incorrect register class."; |
3784 | return false; |
3785 | } |
3786 | } |
3787 | |
3788 | return true; |
3789 | } |
3790 | |
3791 | if (isMIMG(MI) && MI.memoperands_empty() && MI.mayLoadOrStore()) { |
3792 | ErrInfo = "missing memory operand from MIMG instruction."; |
3793 | return false; |
3794 | } |
3795 | |
3796 | |
3797 | for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { |
3798 | const MachineOperand &MO = MI.getOperand(i); |
3799 | if (MO.isFPImm()) { |
3800 | ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " |
3801 | "all fp values to integers."; |
3802 | return false; |
3803 | } |
3804 | |
3805 | int RegClass = Desc.OpInfo[i].RegClass; |
3806 | |
3807 | switch (Desc.OpInfo[i].OperandType) { |
3808 | case MCOI::OPERAND_REGISTER: |
3809 | if (MI.getOperand(i).isImm() || MI.getOperand(i).isGlobal()) { |
3810 | ErrInfo = "Illegal immediate value for operand."; |
3811 | return false; |
3812 | } |
3813 | break; |
3814 | case AMDGPU::OPERAND_REG_IMM_INT32: |
3815 | case AMDGPU::OPERAND_REG_IMM_FP32: |
3816 | break; |
3817 | case AMDGPU::OPERAND_REG_INLINE_C_INT32: |
3818 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: |
3819 | case AMDGPU::OPERAND_REG_INLINE_C_INT64: |
3820 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: |
3821 | case AMDGPU::OPERAND_REG_INLINE_C_INT16: |
3822 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: |
3823 | case AMDGPU::OPERAND_REG_INLINE_AC_INT32: |
3824 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: |
3825 | case AMDGPU::OPERAND_REG_INLINE_AC_INT16: |
3826 | case AMDGPU::OPERAND_REG_INLINE_AC_FP16: |
3827 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: { |
3828 | if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) { |
3829 | ErrInfo = "Illegal immediate value for operand."; |
3830 | return false; |
3831 | } |
3832 | break; |
3833 | } |
3834 | case MCOI::OPERAND_IMMEDIATE: |
3835 | case AMDGPU::OPERAND_KIMM32: |
3836 | |
3837 | |
3838 | |
3839 | if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) { |
3840 | ErrInfo = "Expected immediate, but got non-immediate"; |
3841 | return false; |
3842 | } |
3843 | LLVM_FALLTHROUGH; |
3844 | default: |
3845 | continue; |
3846 | } |
3847 | |
3848 | if (!MO.isReg()) |
3849 | continue; |
3850 | Register Reg = MO.getReg(); |
3851 | if (!Reg) |
3852 | continue; |
3853 | |
3854 | |
3855 | |
3856 | |
3857 | |
3858 | if (ST.needsAlignedVGPRs()) { |
3859 | const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg); |
3860 | const bool IsVGPR = RI.hasVGPRs(RC); |
3861 | const bool IsAGPR = !IsVGPR && RI.hasAGPRs(RC); |
3862 | if ((IsVGPR || IsAGPR) && MO.getSubReg()) { |
3863 | const TargetRegisterClass *SubRC = |
3864 | RI.getSubRegClass(RC, MO.getSubReg()); |
3865 | RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.getSubReg()); |
3866 | if (RC) |
3867 | RC = SubRC; |
3868 | } |
3869 | |
3870 | |
3871 | if (!RC || !RI.isProperlyAlignedRC(*RC)) { |
3872 | ErrInfo = "Subtarget requires even aligned vector registers"; |
3873 | return false; |
3874 | } |
3875 | } |
3876 | |
3877 | if (RegClass != -1) { |
3878 | if (Reg.isVirtual()) |
3879 | continue; |
3880 | |
3881 | const TargetRegisterClass *RC = RI.getRegClass(RegClass); |
3882 | if (!RC->contains(Reg)) { |
3883 | ErrInfo = "Operand has incorrect register class."; |
3884 | return false; |
3885 | } |
3886 | } |
3887 | } |
3888 | |
3889 | |
3890 | if (isSDWA(MI)) { |
3891 | if (!ST.hasSDWA()) { |
3892 | ErrInfo = "SDWA is not supported on this target"; |
3893 | return false; |
3894 | } |
3895 | |
3896 | int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); |
3897 | |
3898 | const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx }; |
3899 | |
3900 | for (int OpIdx: OpIndicies) { |
3901 | if (OpIdx == -1) |
3902 | continue; |
3903 | const MachineOperand &MO = MI.getOperand(OpIdx); |
3904 | |
3905 | if (!ST.hasSDWAScalar()) { |
3906 | |
3907 | if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) { |
3908 | ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI"; |
3909 | return false; |
3910 | } |
3911 | } else { |
3912 | |
3913 | if (!MO.isReg()) { |
3914 | ErrInfo = |
3915 | "Only reg allowed as operands in SDWA instructions on GFX9+"; |
3916 | return false; |
3917 | } |
3918 | } |
3919 | } |
3920 | |
3921 | if (!ST.hasSDWAOmod()) { |
3922 | |
3923 | const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); |
3924 | if (OMod != nullptr && |
3925 | (!OMod->isImm() || OMod->getImm() != 0)) { |
3926 | ErrInfo = "OMod not allowed in SDWA instructions on VI"; |
3927 | return false; |
3928 | } |
3929 | } |
3930 | |
3931 | uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode); |
3932 | if (isVOPC(BasicOpcode)) { |
3933 | if (!ST.hasSDWASdst() && DstIdx != -1) { |
3934 | |
3935 | const MachineOperand &Dst = MI.getOperand(DstIdx); |
3936 | if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) { |
3937 | ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI"; |
3938 | return false; |
3939 | } |
3940 | } else if (!ST.hasSDWAOutModsVOPC()) { |
3941 | |
3942 | const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); |
3943 | if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) { |
3944 | ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI"; |
3945 | return false; |
3946 | } |
3947 | |
3948 | |
3949 | const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); |
3950 | if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) { |
3951 | ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI"; |
3952 | return false; |
3953 | } |
3954 | } |
3955 | } |
3956 | |
3957 | const MachineOperand *DstUnused = getNamedOperand(MI, AMDGPU::OpName::dst_unused); |
3958 | if (DstUnused && DstUnused->isImm() && |
3959 | DstUnused->getImm() == AMDGPU::SDWA::UNUSED_PRESERVE) { |
3960 | const MachineOperand &Dst = MI.getOperand(DstIdx); |
3961 | if (!Dst.isReg() || !Dst.isTied()) { |
3962 | ErrInfo = "Dst register should have tied register"; |
3963 | return false; |
3964 | } |
3965 | |
3966 | const MachineOperand &TiedMO = |
3967 | MI.getOperand(MI.findTiedOperandIdx(DstIdx)); |
3968 | if (!TiedMO.isReg() || !TiedMO.isImplicit() || !TiedMO.isUse()) { |
3969 | ErrInfo = |
3970 | "Dst register should be tied to implicit use of preserved register"; |
3971 | return false; |
3972 | } else if (TiedMO.getReg().isPhysical() && |
3973 | Dst.getReg() != TiedMO.getReg()) { |
3974 | ErrInfo = "Dst register should use same physical register as preserved"; |
3975 | return false; |
3976 | } |
3977 | } |
3978 | } |
3979 | |
3980 | |
3981 | if (isMIMG(MI.getOpcode()) && !MI.mayStore()) { |
3982 | |
3983 | |
3984 | const MachineOperand *DMask = getNamedOperand(MI, AMDGPU::OpName::dmask); |
3985 | if (DMask) { |
3986 | uint64_t DMaskImm = DMask->getImm(); |
3987 | uint32_t RegCount = |
3988 | isGather4(MI.getOpcode()) ? 4 : countPopulation(DMaskImm); |
3989 | const MachineOperand *TFE = getNamedOperand(MI, AMDGPU::OpName::tfe); |
3990 | const MachineOperand *LWE = getNamedOperand(MI, AMDGPU::OpName::lwe); |
3991 | const MachineOperand *D16 = getNamedOperand(MI, AMDGPU::OpName::d16); |
3992 | |
3993 | |
3994 | if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem()) |
3995 | RegCount >>= 1; |
3996 | |
3997 | |
3998 | if ((LWE && LWE->getImm()) || (TFE && TFE->getImm())) |
3999 | RegCount += 1; |
4000 | |
4001 | const uint32_t DstIdx = |
4002 | AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); |
4003 | const MachineOperand &Dst = MI.getOperand(DstIdx); |
4004 | if (Dst.isReg()) { |
4005 | const TargetRegisterClass *DstRC = getOpRegClass(MI, DstIdx); |
4006 | uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32; |
4007 | if (RegCount > DstSize) { |
4008 | ErrInfo = "MIMG instruction returns too many registers for dst " |
4009 | "register class"; |
4010 | return false; |
4011 | } |
4012 | } |
4013 | } |
4014 | } |
4015 | |
4016 | |
4017 | if (Desc.getOpcode() != AMDGPU::V_WRITELANE_B32 |
4018 | && (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI))) { |
4019 | |
4020 | |
4021 | |
4022 | const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; |
4023 | |
4024 | unsigned ConstantBusCount = 0; |
4025 | bool UsesLiteral = false; |
4026 | const MachineOperand *LiteralVal = nullptr; |
4027 | |
4028 | if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) |
4029 | ++ConstantBusCount; |
4030 | |
4031 | SmallVector<Register, 2> SGPRsUsed; |
4032 | Register SGPRUsed; |
4033 | |
4034 | for (int OpIdx : OpIndices) { |
4035 | if (OpIdx == -1) |
4036 | break; |
4037 | const MachineOperand &MO = MI.getOperand(OpIdx); |
4038 | if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) { |
4039 | if (MO.isReg()) { |
4040 | SGPRUsed = MO.getReg(); |
4041 | if (llvm::all_of(SGPRsUsed, [SGPRUsed](unsigned SGPR) { |
4042 | return SGPRUsed != SGPR; |
4043 | })) { |
4044 | ++ConstantBusCount; |
4045 | SGPRsUsed.push_back(SGPRUsed); |
4046 | } |
4047 | } else { |
4048 | if (!UsesLiteral) { |
4049 | ++ConstantBusCount; |
4050 | UsesLiteral = true; |
4051 | LiteralVal = &MO; |
4052 | } else if (!MO.isIdenticalTo(*LiteralVal)) { |
4053 | assert(isVOP3(MI)); |
4054 | ErrInfo = "VOP3 instruction uses more than one literal"; |
4055 | return false; |
4056 | } |
4057 | } |
4058 | } |
4059 | } |
4060 | |
4061 | SGPRUsed = findImplicitSGPRRead(MI); |
4062 | if (SGPRUsed != AMDGPU::NoRegister) { |
4063 | |
4064 | if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) { |
4065 | return !RI.regsOverlap(SGPRUsed, SGPR); |
4066 | })) { |
4067 | ++ConstantBusCount; |
4068 | SGPRsUsed.push_back(SGPRUsed); |
4069 | } |
4070 | } |
4071 | |
4072 | |
4073 | |
4074 | if (ConstantBusCount > ST.getConstantBusLimit(Opcode) && |
4075 | Opcode != AMDGPU::V_WRITELANE_B32) { |
4076 | ErrInfo = "VOP* instruction violates constant bus restriction"; |
4077 | return false; |
4078 | } |
4079 | |
4080 | if (isVOP3(MI) && UsesLiteral && !ST.hasVOP3Literal()) { |
4081 | ErrInfo = "VOP3 instruction uses literal"; |
4082 | return false; |
4083 | } |
4084 | } |
4085 | |
4086 | |
4087 | |
4088 | if (Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) { |
4089 | unsigned SGPRCount = 0; |
4090 | Register SGPRUsed = AMDGPU::NoRegister; |
4091 | |
4092 | for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) { |
4093 | if (OpIdx == -1) |
4094 | break; |
4095 | |
4096 | const MachineOperand &MO = MI.getOperand(OpIdx); |
4097 | |
4098 | if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) { |
4099 | if (MO.isReg() && MO.getReg() != AMDGPU::M0) { |
4100 | if (MO.getReg() != SGPRUsed) |
4101 | ++SGPRCount; |
4102 | SGPRUsed = MO.getReg(); |
4103 | } |
4104 | } |
4105 | if (SGPRCount > ST.getConstantBusLimit(Opcode)) { |
4106 | ErrInfo = "WRITELANE instruction violates constant bus restriction"; |
4107 | return false; |
4108 | } |
4109 | } |
4110 | } |
4111 | |
4112 | |
4113 | if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 || |
4114 | Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) { |
4115 | const MachineOperand &Src0 = MI.getOperand(Src0Idx); |
4116 | const MachineOperand &Src1 = MI.getOperand(Src1Idx); |
4117 | const MachineOperand &Src2 = MI.getOperand(Src2Idx); |
4118 | if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { |
4119 | if (!compareMachineOp(Src0, Src1) && |
4120 | !compareMachineOp(Src0, Src2)) { |
4121 | ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; |
4122 | return false; |
4123 | } |
4124 | } |
4125 | if ((getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() & |
4126 | SISrcMods::ABS) || |
4127 | (getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm() & |
4128 | SISrcMods::ABS) || |
4129 | (getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() & |
4130 | SISrcMods::ABS)) { |
4131 | ErrInfo = "ABS not allowed in VOP3B instructions"; |
4132 | return false; |
4133 | } |
4134 | } |
4135 | |
4136 | if (isSOP2(MI) || isSOPC(MI)) { |
4137 | const MachineOperand &Src0 = MI.getOperand(Src0Idx); |
4138 | const MachineOperand &Src1 = MI.getOperand(Src1Idx); |
4139 | unsigned Immediates = 0; |
4140 | |
4141 | if (!Src0.isReg() && |
4142 | !isInlineConstant(Src0, Desc.OpInfo[Src0Idx].OperandType)) |
4143 | Immediates++; |
4144 | if (!Src1.isReg() && |
4145 | !isInlineConstant(Src1, Desc.OpInfo[Src1Idx].OperandType)) |
4146 | Immediates++; |
4147 | |
4148 | if (Immediates > 1) { |
4149 | ErrInfo = "SOP2/SOPC instruction requires too many immediate constants"; |
4150 | return false; |
4151 | } |
4152 | } |
4153 | |
4154 | if (isSOPK(MI)) { |
4155 | auto Op = getNamedOperand(MI, AMDGPU::OpName::simm16); |
4156 | if (Desc.isBranch()) { |
4157 | if (!Op->isMBB()) { |
4158 | ErrInfo = "invalid branch target for SOPK instruction"; |
4159 | return false; |
4160 | } |
4161 | } else { |
4162 | uint64_t Imm = Op->getImm(); |
4163 | if (sopkIsZext(MI)) { |
4164 | if (!isUInt<16>(Imm)) { |
4165 | ErrInfo = "invalid immediate for SOPK instruction"; |
4166 | return false; |
4167 | } |
4168 | } else { |
4169 | if (!isInt<16>(Imm)) { |
4170 | ErrInfo = "invalid immediate for SOPK instruction"; |
4171 | return false; |
4172 | } |
4173 | } |
4174 | } |
4175 | } |
4176 | |
4177 | if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 || |
4178 | Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 || |
4179 | Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 || |
4180 | Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) { |
4181 | const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 || |
4182 | Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64; |
4183 | |
4184 | const unsigned StaticNumOps = Desc.getNumOperands() + |
4185 | Desc.getNumImplicitUses(); |
4186 | const unsigned NumImplicitOps = IsDst ? 2 : 1; |
4187 | |
4188 | |
4189 | |
4190 | |
4191 | if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) { |
4192 | ErrInfo = "missing implicit register operands"; |
4193 | return false; |
4194 | } |
4195 | |
4196 | const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); |
4197 | if (IsDst) { |
4198 | if (!Dst->isUse()) { |
4199 | ErrInfo = "v_movreld_b32 vdst should be a use operand"; |
4200 | return false; |
4201 | } |
4202 | |
4203 | unsigned UseOpIdx; |
4204 | if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) || |
4205 | UseOpIdx != StaticNumOps + 1) { |
4206 | ErrInfo = "movrel implicit operands should be tied"; |
4207 | return false; |
4208 | } |
4209 | } |
4210 | |
4211 | const MachineOperand &Src0 = MI.getOperand(Src0Idx); |
4212 | const MachineOperand &ImpUse |
4213 | = MI.getOperand(StaticNumOps + NumImplicitOps - 1); |
4214 | if (!ImpUse.isReg() || !ImpUse.isUse() || |
4215 | !isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) { |
4216 | ErrInfo = "src0 should be subreg of implicit vector use"; |
4217 | return false; |
4218 | } |
4219 | } |
4220 | |
4221 | |
4222 | |
4223 | if (shouldReadExec(MI)) { |
4224 | if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { |
4225 | ErrInfo = "VALU instruction does not implicitly read exec mask"; |
4226 | return false; |
4227 | } |
4228 | } |
4229 | |
4230 | if (isSMRD(MI)) { |
4231 | if (MI.mayStore()) { |
4232 | |
4233 | |
4234 | const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff); |
4235 | if (Soff && Soff->getReg() != AMDGPU::M0) { |
4236 | ErrInfo = "scalar stores must use m0 as offset register"; |
4237 | return false; |
4238 | } |
4239 | } |
4240 | } |
4241 | |
4242 | if (isFLAT(MI) && !ST.hasFlatInstOffsets()) { |
4243 | const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); |
4244 | if (Offset->getImm() != 0) { |
4245 | ErrInfo = "subtarget does not support offsets in flat instructions"; |
4246 | return false; |
4247 | } |
4248 | } |
4249 | |
4250 | if (isMIMG(MI)) { |
4251 | const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim); |
4252 | if (DimOp) { |
4253 | int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode, |
4254 | AMDGPU::OpName::vaddr0); |
4255 | int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc); |
4256 | const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opcode); |
4257 | const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = |
4258 | AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); |
4259 | const AMDGPU::MIMGDimInfo *Dim = |
4260 | AMDGPU::getMIMGDimInfoByEncoding(DimOp->getImm()); |
4261 | |
4262 | if (!Dim) { |
4263 | ErrInfo = "dim is out of range"; |
4264 | return false; |
4265 | } |
4266 | |
4267 | bool IsA16 = false; |
4268 | if (ST.hasR128A16()) { |
4269 | const MachineOperand *R128A16 = getNamedOperand(MI, AMDGPU::OpName::r128); |
4270 | IsA16 = R128A16->getImm() != 0; |
4271 | } else if (ST.hasGFX10A16()) { |
4272 | const MachineOperand *A16 = getNamedOperand(MI, AMDGPU::OpName::a16); |
4273 | IsA16 = A16->getImm() != 0; |
4274 | } |
4275 | |
4276 | bool IsNSA = SRsrcIdx - VAddr0Idx > 1; |
4277 | |
4278 | unsigned AddrWords = |
4279 | AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, ST.hasG16()); |
4280 | |
4281 | unsigned VAddrWords; |
4282 | if (IsNSA) { |
4283 | VAddrWords = SRsrcIdx - VAddr0Idx; |
4284 | } else { |
4285 | const TargetRegisterClass *RC = getOpRegClass(MI, VAddr0Idx); |
4286 | VAddrWords = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 32; |
4287 | if (AddrWords > 8) |
4288 | AddrWords = 16; |
4289 | } |
4290 | |
4291 | if (VAddrWords != AddrWords) { |
4292 | LLVM_DEBUG(dbgs() << "bad vaddr size, expected " << AddrWords |
4293 | << " but got " << VAddrWords << "\n"); |
4294 | ErrInfo = "bad vaddr size"; |
4295 | return false; |
4296 | } |
4297 | } |
4298 | } |
4299 | |
4300 | const MachineOperand *DppCt = getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl); |
4301 | if (DppCt) { |
4302 | using namespace AMDGPU::DPP; |
4303 | |
4304 | unsigned DC = DppCt->getImm(); |
4305 | if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 || |
4306 | DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST || |
4307 | (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) || |
4308 | (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) || |
4309 | (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) || |
4310 | (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) || |
4311 | (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) { |
4312 | ErrInfo = "Invalid dpp_ctrl value"; |
4313 | return false; |
4314 | } |
4315 | if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 && |
4316 | ST.getGeneration() >= AMDGPUSubtarget::GFX10) { |
4317 | ErrInfo = "Invalid dpp_ctrl value: " |
4318 | "wavefront shifts are not supported on GFX10+"; |
4319 | return false; |
4320 | } |
4321 | if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 && |
4322 | ST.getGeneration() >= AMDGPUSubtarget::GFX10) { |
4323 | ErrInfo = "Invalid dpp_ctrl value: " |
4324 | "broadcasts are not supported on GFX10+"; |
4325 | return false; |
4326 | } |
4327 | if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST && |
4328 | ST.getGeneration() < AMDGPUSubtarget::GFX10) { |
4329 | if (DC >= DppCtrl::ROW_NEWBCAST_FIRST && |
4330 | DC <= DppCtrl::ROW_NEWBCAST_LAST && |
4331 | !ST.hasGFX90AInsts()) { |
4332 | ErrInfo = "Invalid dpp_ctrl value: " |
4333 | "row_newbroadcast/row_share is not supported before " |
4334 | "GFX90A/GFX10"; |
4335 | return false; |
4336 | } else if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) { |
4337 | ErrInfo = "Invalid dpp_ctrl value: " |
4338 | "row_share and row_xmask are not supported before GFX10"; |
4339 | return false; |
4340 | } |
4341 | } |
4342 | |
4343 | int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); |
4344 | int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); |
4345 | |
4346 | if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO && |
4347 | ((DstIdx >= 0 && |
4348 | (Desc.OpInfo[DstIdx].RegClass == AMDGPU::VReg_64RegClassID || |
4349 | Desc.OpInfo[DstIdx].RegClass == AMDGPU::VReg_64_Align2RegClassID)) || |
4350 | ((Src0Idx >= 0 && |
4351 | (Desc.OpInfo[Src0Idx].RegClass == AMDGPU::VReg_64RegClassID || |
4352 | Desc.OpInfo[Src0Idx].RegClass == |
4353 | AMDGPU::VReg_64_Align2RegClassID)))) && |
4354 | !AMDGPU::isLegal64BitDPPControl(DC)) { |
4355 | ErrInfo = "Invalid dpp_ctrl value: " |
4356 | "64 bit dpp only support row_newbcast"; |
4357 | return false; |
4358 | } |
4359 | } |
4360 | |
4361 | if ((MI.mayStore() || MI.mayLoad()) && !isVGPRSpill(MI)) { |
4362 | const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); |
4363 | uint16_t DataNameIdx = isDS(Opcode) ? AMDGPU::OpName::data0 |
4364 | : AMDGPU::OpName::vdata; |
4365 | const MachineOperand *Data = getNamedOperand(MI, DataNameIdx); |
4366 | const MachineOperand *Data2 = getNamedOperand(MI, AMDGPU::OpName::data1); |
4367 | if (Data && !Data->isReg()) |
4368 | Data = nullptr; |
4369 | |
4370 | if (ST.hasGFX90AInsts()) { |
4371 | if (Dst && Data && |
4372 | (RI.isAGPR(MRI, Dst->getReg()) != RI.isAGPR(MRI, Data->getReg()))) { |
4373 | ErrInfo = "Invalid register class: " |
4374 | "vdata and vdst should be both VGPR or AGPR"; |
4375 | return false; |
4376 | } |
4377 | if (Data && Data2 && |
4378 | (RI.isAGPR(MRI, Data->getReg()) != RI.isAGPR(MRI, Data2->getReg()))) { |
4379 | ErrInfo = "Invalid register class: " |
4380 | "both data operands should be VGPR or AGPR"; |
4381 | return false; |
4382 | } |
4383 | } else { |
4384 | if ((Dst && RI.isAGPR(MRI, Dst->getReg())) || |
4385 | (Data && RI.isAGPR(MRI, Data->getReg())) || |
4386 | (Data2 && RI.isAGPR(MRI, Data2->getReg()))) { |
4387 | ErrInfo = "Invalid register class: " |
4388 | "agpr loads and stores not supported on this GPU"; |
4389 | return false; |
4390 | } |
4391 | } |
4392 | } |
4393 | |
4394 | if (ST.needsAlignedVGPRs() && |
4395 | (MI.getOpcode() == AMDGPU::DS_GWS_INIT || |
4396 | MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR || |
4397 | MI.getOpcode() == AMDGPU::DS_GWS_BARRIER)) { |
4398 | const MachineOperand *Op = getNamedOperand(MI, AMDGPU::OpName::data0); |
4399 | Register Reg = Op->getReg(); |
4400 | bool Aligned = true; |
4401 | if (Reg.isPhysical()) { |
4402 | Aligned = !(RI.getHWRegIndex(Reg) & 1); |
4403 | } else { |
4404 | const TargetRegisterClass &RC = *MRI.getRegClass(Reg); |
4405 | Aligned = RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) && |
4406 | !(RI.getChannelFromSubReg(Op->getSubReg()) & 1); |
4407 | } |
4408 | |
4409 | if (!Aligned) { |
4410 | ErrInfo = "Subtarget requires even aligned vector registers " |
4411 | "for DS_GWS instructions"; |
4412 | return false; |
4413 | } |
4414 | } |
4415 | |
4416 | return true; |
4417 | } |
4418 | |
4419 | unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { |
4420 | switch (MI.getOpcode()) { |
4421 | default: return AMDGPU::INSTRUCTION_LIST_END; |
4422 | case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; |
4423 | case AMDGPU::COPY: return AMDGPU::COPY; |
4424 | case AMDGPU::PHI: return AMDGPU::PHI; |
4425 | case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; |
4426 | case AMDGPU::WQM: return AMDGPU::WQM; |
4427 | case AMDGPU::SOFT_WQM: return AMDGPU::SOFT_WQM; |
4428 | case AMDGPU::STRICT_WWM: return AMDGPU::STRICT_WWM; |
4429 | case AMDGPU::STRICT_WQM: return AMDGPU::STRICT_WQM; |
4430 | case AMDGPU::S_MOV_B32: { |
4431 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
4432 | return MI.getOperand(1).isReg() || |
4433 | RI.isAGPR(MRI, MI.getOperand(0).getReg()) ? |
4434 | AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; |
4435 | } |
4436 | case AMDGPU::S_ADD_I32: |
4437 | return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32; |
4438 | case AMDGPU::S_ADDC_U32: |
4439 | return AMDGPU::V_ADDC_U32_e32; |
4440 | case AMDGPU::S_SUB_I32: |
4441 | return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32; |
4442 | |
4443 | |
4444 | case AMDGPU::S_ADD_U32: |
4445 | return AMDGPU::V_ADD_CO_U32_e32; |
4446 | case AMDGPU::S_SUB_U32: |
4447 | return AMDGPU::V_SUB_CO_U32_e32; |
4448 | case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; |
4449 | case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_U32_e64; |
4450 | case AMDGPU::S_MUL_HI_U32: return AMDGPU::V_MUL_HI_U32_e64; |
4451 | case AMDGPU::S_MUL_HI_I32: return AMDGPU::V_MUL_HI_I32_e64; |
4452 | case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64; |
4453 | case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64; |
4454 | case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64; |
4455 | case AMDGPU::S_XNOR_B32: |
4456 | return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END; |
4457 | case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64; |
4458 | case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64; |
4459 | case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64; |
4460 | case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64; |
4461 | case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; |
4462 | case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64_e64; |
4463 | case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; |
4464 | case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64_e64; |
4465 | case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; |
4466 | case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64_e64; |
4467 | case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32_e64; |
4468 | case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32_e64; |
4469 | case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32_e64; |
4470 | case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32_e64; |
4471 | case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64; |
4472 | case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; |
4473 | case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; |
4474 | case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; |
4475 | case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; |
4476 | case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; |
4477 | case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; |
4478 | case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; |
4479 | case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; |
4480 | case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; |
4481 | case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32; |
4482 | case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32; |
4483 | case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32; |
4484 | case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32; |
4485 | case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32; |
4486 | case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32; |
4487 | case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32; |
4488 | case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32; |
4489 | case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; |
4490 | case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; |
4491 | case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; |
4492 | case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; |
4493 | case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ; |
4494 | case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ; |
4495 | } |
4496 | llvm_unreachable( |
4497 | "Unexpected scalar opcode without corresponding vector one!"); |
4498 | } |
4499 | |
4500 | static unsigned adjustAllocatableRegClass(const GCNSubtarget &ST, |
4501 | const MachineRegisterInfo &MRI, |
4502 | const MCInstrDesc &TID, |
4503 | unsigned RCID, |
4504 | bool IsAllocatable) { |
4505 | if ((IsAllocatable || !ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) && |
4506 | (TID.mayLoad() || TID.mayStore() || |
4507 | (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::MIMG)))) { |
4508 | switch (RCID) { |
4509 | case AMDGPU::AV_32RegClassID: return AMDGPU::VGPR_32RegClassID; |
4510 | case AMDGPU::AV_64RegClassID: return AMDGPU::VReg_64RegClassID; |
4511 | case AMDGPU::AV_96RegClassID: return AMDGPU::VReg_96RegClassID; |
4512 | case AMDGPU::AV_128RegClassID: return AMDGPU::VReg_128RegClassID; |
4513 | case AMDGPU::AV_160RegClassID: return AMDGPU::VReg_160RegClassID; |
4514 | default: |
4515 | break; |
4516 | } |
4517 | } |
4518 | return RCID; |
4519 | } |
4520 | |
4521 | const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, |
4522 | unsigned OpNum, const TargetRegisterInfo *TRI, |
4523 | const MachineFunction &MF) |
4524 | const { |
4525 | if (OpNum >= TID.getNumOperands()) |
4526 | return nullptr; |
4527 | auto RegClass = TID.OpInfo[OpNum].RegClass; |
4528 | bool IsAllocatable = false; |
4529 | if (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::FLAT)) { |
4530 | |
4531 | |
4532 | |
4533 | |
4534 | |
4535 | |
4536 | |
4537 | const int VDstIdx = AMDGPU::getNamedOperandIdx(TID.Opcode, |
4538 | AMDGPU::OpName::vdst); |
4539 | const int DataIdx = AMDGPU::getNamedOperandIdx(TID.Opcode, |
4540 | (TID.TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 |
4541 | : AMDGPU::OpName::vdata); |
4542 | if (DataIdx != -1) { |
4543 | IsAllocatable = VDstIdx != -1 || |
4544 | AMDGPU::getNamedOperandIdx(TID.Opcode, |
4545 | AMDGPU::OpName::data1) != -1; |
4546 | } |
4547 | } |
4548 | RegClass = adjustAllocatableRegClass(ST, MF.getRegInfo(), TID, RegClass, |
4549 | IsAllocatable); |
4550 | return RI.getRegClass(RegClass); |
4551 | } |
4552 | |
4553 | const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, |
4554 | unsigned OpNo) const { |
4555 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
4556 | const MCInstrDesc &Desc = get(MI.getOpcode()); |
4557 | if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || |
4558 | Desc.OpInfo[OpNo].RegClass == -1) { |
4559 | Register Reg = MI.getOperand(OpNo).getReg(); |
4560 | |
4561 | if (Reg.isVirtual()) |
4562 | return MRI.getRegClass(Reg); |
4563 | return RI.getPhysRegClass(Reg); |
4564 | } |
4565 | |
4566 | unsigned RCID = Desc.OpInfo[OpNo].RegClass; |
4567 | RCID = adjustAllocatableRegClass(ST, MRI, Desc, RCID, true); |
4568 | return RI.getRegClass(RCID); |
4569 | } |
4570 | |
4571 | void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { |
4572 | MachineBasicBlock::iterator I = MI; |
4573 | MachineBasicBlock *MBB = MI.getParent(); |
4574 | MachineOperand &MO = MI.getOperand(OpIdx); |
4575 | MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
4576 | unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass; |
4577 | const TargetRegisterClass *RC = RI.getRegClass(RCID); |
4578 | unsigned Size = RI.getRegSizeInBits(*RC); |
4579 | unsigned Opcode = (Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32; |
4580 | if (MO.isReg()) |
4581 | Opcode = AMDGPU::COPY; |
4582 | else if (RI.isSGPRClass(RC)) |
4583 | Opcode = (Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; |
4584 | |
4585 | const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); |
4586 | const TargetRegisterClass *VRC64 = RI.getVGPR64Class(); |
4587 | if (RI.getCommonSubClass(VRC64, VRC)) |
4588 | VRC = VRC64; |
4589 | else |
4590 | VRC = &AMDGPU::VGPR_32RegClass; |
4591 | |
4592 | Register Reg = MRI.createVirtualRegister(VRC); |
4593 | DebugLoc DL = MBB->findDebugLoc(I); |
4594 | BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO); |
4595 | MO.ChangeToRegister(Reg, false); |
4596 | } |
4597 | |
4598 | unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, |
4599 | MachineRegisterInfo &MRI, |
4600 | MachineOperand &SuperReg, |
4601 | const TargetRegisterClass *SuperRC, |
4602 | unsigned SubIdx, |
4603 | const TargetRegisterClass *SubRC) |
4604 | const { |
4605 | MachineBasicBlock *MBB = MI->getParent(); |
4606 | DebugLoc DL = MI->getDebugLoc(); |
4607 | Register SubReg = MRI.createVirtualRegister(SubRC); |
4608 | |
4609 | if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { |
4610 | BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) |
4611 | .addReg(SuperReg.getReg(), 0, SubIdx); |
4612 | return SubReg; |
4613 | } |
4614 | |
4615 | |
4616 | |
4617 | |
4618 | |
4619 | Register NewSuperReg = MRI.createVirtualRegister(SuperRC); |
4620 | |
4621 | BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) |
4622 | .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); |
4623 | |
4624 | BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) |
4625 | .addReg(NewSuperReg, 0, SubIdx); |
4626 | |
4627 | return SubReg; |
4628 | } |
4629 | |
4630 | MachineOperand SIInstrInfo::buildExtractSubRegOrImm( |
4631 | MachineBasicBlock::iterator MII, |
4632 | MachineRegisterInfo &MRI, |
4633 | MachineOperand &Op, |
4634 | const TargetRegisterClass *SuperRC, |
4635 | unsigned SubIdx, |
4636 | const TargetRegisterClass *SubRC) const { |
4637 | if (Op.isImm()) { |
4638 | if (SubIdx == AMDGPU::sub0) |
4639 | return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm())); |
4640 | if (SubIdx == AMDGPU::sub1) |
4641 | return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32)); |
4642 | |
4643 | llvm_unreachable("Unhandled register index for immediate"); |
4644 | } |
4645 | |
4646 | unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, |
4647 | SubIdx, SubRC); |
4648 | return MachineOperand::CreateReg(SubReg, false); |
4649 | } |
4650 | |
4651 | |
4652 | void SIInstrInfo::swapOperands(MachineInstr &Inst) const { |
4653 | assert(Inst.getNumExplicitOperands() == 3); |
4654 | MachineOperand Op1 = Inst.getOperand(1); |
4655 | Inst.RemoveOperand(1); |
4656 | Inst.addOperand(Op1); |
4657 | } |
4658 | |
4659 | bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, |
4660 | const MCOperandInfo &OpInfo, |
4661 | const MachineOperand &MO) const { |
4662 | if (!MO.isReg()) |
4663 | return false; |
4664 | |
4665 | Register Reg = MO.getReg(); |
4666 | |
4667 | const TargetRegisterClass *DRC = RI.getRegClass(OpInfo.RegClass); |
4668 | if (Reg.isPhysical()) |
4669 | return DRC->contains(Reg); |
4670 | |
4671 | const TargetRegisterClass *RC = MRI.getRegClass(Reg); |
4672 | |
4673 | if (MO.getSubReg()) { |
4674 | const MachineFunction *MF = MO.getParent()->getParent()->getParent(); |
4675 | const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF); |
4676 | if (!SuperRC) |
4677 | return false; |
4678 | |
4679 | DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg()); |
4680 | if (!DRC) |
4681 | return false; |
4682 | } |
4683 | return RC->hasSuperClassEq(DRC); |
4684 | } |
4685 | |
4686 | bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, |
4687 | const MCOperandInfo &OpInfo, |
4688 | const MachineOperand &MO) const { |
4689 | if (MO.isReg()) |
4690 | return isLegalRegOperand(MRI, OpInfo, MO); |
4691 | |
4692 | |
4693 | assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal()); |
4694 | return true; |
4695 | } |
4696 | |
4697 | bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, |
4698 | const MachineOperand *MO) const { |
4699 | const MachineFunction &MF = *MI.getParent()->getParent(); |
4700 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
4701 | const MCInstrDesc &InstDesc = MI.getDesc(); |
4702 | const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; |
4703 | const TargetRegisterClass *DefinedRC = |
4704 | OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; |
4705 | if (!MO) |
4706 | MO = &MI.getOperand(OpIdx); |
4707 | |
4708 | int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode()); |
4709 | int VOP3LiteralLimit = ST.hasVOP3Literal() ? 1 : 0; |
4710 | if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) { |
4711 | if (isVOP3(MI) && isLiteralConstantLike(*MO, OpInfo) && !VOP3LiteralLimit--) |
4712 | return false; |
4713 | |
4714 | SmallDenseSet<RegSubRegPair> SGPRsUsed; |
4715 | if (MO->isReg()) |
4716 | SGPRsUsed.insert(RegSubRegPair(MO->getReg(), MO->getSubReg())); |
4717 | |
4718 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
4719 | if (i == OpIdx) |
4720 | continue; |
4721 | const MachineOperand &Op = MI.getOperand(i); |
4722 | if (Op.isReg()) { |
4723 | RegSubRegPair SGPR(Op.getReg(), Op.getSubReg()); |
4724 | if (!SGPRsUsed.count(SGPR) && |
4725 | usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) { |
4726 | if (--ConstantBusLimit <= 0) |
4727 | return false; |
4728 | SGPRsUsed.insert(SGPR); |
4729 | } |
4730 | } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) { |
4731 | if (--ConstantBusLimit <= 0) |
4732 | return false; |
4733 | } else if (isVOP3(MI) && AMDGPU::isSISrcOperand(InstDesc, i) && |
4734 | isLiteralConstantLike(Op, InstDesc.OpInfo[i])) { |
4735 | if (!VOP3LiteralLimit--) |
4736 | return false; |
4737 | if (--ConstantBusLimit <= 0) |
4738 | return false; |
4739 | } |
4740 | } |
4741 | } |
4742 | |
4743 | if (MO->isReg()) { |
4744 | assert(DefinedRC); |
4745 | if (!isLegalRegOperand(MRI, OpInfo, *MO)) |
4746 | return false; |
4747 | bool IsAGPR = RI.isAGPR(MRI, MO->getReg()); |
4748 | if (IsAGPR && !ST.hasMAIInsts()) |
4749 | return false; |
4750 | unsigned Opc = MI.getOpcode(); |
4751 | if (IsAGPR && |
4752 | (!ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) && |
4753 | (MI.mayLoad() || MI.mayStore() || isDS(Opc) || isMIMG(Opc))) |
4754 | return false; |
4755 | |
4756 | const int VDstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); |
4757 | const int DataIdx = AMDGPU::getNamedOperandIdx(Opc, |
4758 | isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata); |
4759 | if ((int)OpIdx == VDstIdx && DataIdx != -1 && |
4760 | MI.getOperand(DataIdx).isReg() && |
4761 | RI.isAGPR(MRI, MI.getOperand(DataIdx).getReg()) != IsAGPR) |
4762 | return false; |
4763 | if ((int)OpIdx == DataIdx) { |
4764 | if (VDstIdx != -1 && |
4765 | RI.isAGPR(MRI, MI.getOperand(VDstIdx).getReg()) != IsAGPR) |
4766 | return false; |
4767 | |
4768 | const int Data1Idx = AMDGPU::getNamedOperandIdx(Opc, |
4769 | AMDGPU::OpName::data1); |
4770 | if (Data1Idx != -1 && MI.getOperand(Data1Idx).isReg() && |
4771 | RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR) |
4772 | return false; |
4773 | } |
4774 | if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && |
4775 | (int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) && |
4776 | RI.isSGPRReg(MRI, MO->getReg())) |
4777 | return false; |
4778 | return true; |
4779 | } |
4780 | |
4781 | |
4782 | assert(MO->isImm() || MO->isTargetIndex() || MO->isFI() || MO->isGlobal()); |
4783 | |
4784 | if (!DefinedRC) { |
4785 | |
4786 | return true; |
4787 | } |
4788 | |
4789 | return isImmOperandLegal(MI, OpIdx, *MO); |
4790 | } |
4791 | |
4792 | void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, |
4793 | MachineInstr &MI) const { |
4794 | unsigned Opc = MI.getOpcode(); |
4795 | const MCInstrDesc &InstrDesc = get(Opc); |
4796 | |
4797 | int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); |
4798 | MachineOperand &Src0 = MI.getOperand(Src0Idx); |
4799 | |
4800 | int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); |
4801 | MachineOperand &Src1 = MI.getOperand(Src1Idx); |
4802 | |
4803 | |
4804 | |
4805 | bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister; |
4806 | if (HasImplicitSGPR && ST.getConstantBusLimit(Opc) <= 1 && |
4807 | Src0.isReg() && (RI.isSGPRReg(MRI, Src0.getReg()) || |
4808 | isLiteralConstantLike(Src0, InstrDesc.OpInfo[Src0Idx]))) |
4809 | legalizeOpWithMove(MI, Src0Idx); |
4810 | |
4811 | |
4812 | |
4813 | |
4814 | if (Opc == AMDGPU::V_WRITELANE_B32) { |
4815 | const DebugLoc &DL = MI.getDebugLoc(); |
4816 | if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) { |
4817 | Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
4818 | BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) |
4819 | .add(Src0); |
4820 | Src0.ChangeToRegister(Reg, false); |
4821 | } |
4822 | if (Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) { |
4823 | Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
4824 | const DebugLoc &DL = MI.getDebugLoc(); |
4825 | BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) |
4826 | .add(Src1); |
4827 | Src1.ChangeToRegister(Reg, false); |
4828 | } |
4829 | return; |
4830 | } |
4831 | |
4832 | |
4833 | if (Src0.isReg() && RI.isAGPR(MRI, Src0.getReg())) |
4834 | legalizeOpWithMove(MI, Src0Idx); |
4835 | |
4836 | if (Src1.isReg() && RI.isAGPR(MRI, Src1.getReg())) |
4837 | legalizeOpWithMove(MI, Src1Idx); |
4838 | |
4839 | |
4840 | |
4841 | if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1)) |
4842 | return; |
4843 | |
4844 | |
4845 | |
4846 | |
4847 | if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() && |
4848 | RI.isVGPR(MRI, Src1.getReg())) { |
4849 | Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
4850 | const DebugLoc &DL = MI.getDebugLoc(); |
4851 | BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) |
4852 | .add(Src1); |
4853 | Src1.ChangeToRegister(Reg, false); |
4854 | return; |
4855 | } |
4856 | |
4857 | |
4858 | |
4859 | |
4860 | |
4861 | if (HasImplicitSGPR || !MI.isCommutable()) { |
4862 | legalizeOpWithMove(MI, Src1Idx); |
4863 | return; |
4864 | } |
4865 | |
4866 | |
4867 | |
4868 | |
4869 | |
4870 | |
4871 | if ((!Src1.isImm() && !Src1.isReg()) || |
4872 | !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) { |
4873 | legalizeOpWithMove(MI, Src1Idx); |
4874 | return; |
4875 | } |
4876 | |
4877 | int CommutedOpc = commuteOpcode(MI); |
4878 | if (CommutedOpc == -1) { |
4879 | legalizeOpWithMove(MI, Src1Idx); |
4880 | return; |
4881 | } |
4882 | |
4883 | MI.setDesc(get(CommutedOpc)); |
4884 | |
4885 | Register Src0Reg = Src0.getReg(); |
4886 | unsigned Src0SubReg = Src0.getSubReg(); |
4887 | bool Src0Kill = Src0.isKill(); |
4888 | |
4889 | if (Src1.isImm()) |
4890 | Src0.ChangeToImmediate(Src1.getImm()); |
4891 | else if (Src1.isReg()) { |
4892 | Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill()); |
4893 | Src0.setSubReg(Src1.getSubReg()); |
4894 | } else |
4895 | llvm_unreachable("Should only have register or immediate operands"); |
4896 | |
4897 | Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill); |
4898 | Src1.setSubReg(Src0SubReg); |
4899 | fixImplicitOperands(MI); |
4900 | } |
4901 | |
4902 | |
4903 | |
4904 | void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, |
4905 | MachineInstr &MI) const { |
4906 | unsigned Opc = MI.getOpcode(); |
4907 | |
4908 | int VOP3Idx[3] = { |
4909 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0), |
4910 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), |
4911 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2) |
4912 | }; |
4913 | |
4914 | if (Opc == AMDGPU::V_PERMLANE16_B32_e64 || |
4915 | Opc == AMDGPU::V_PERMLANEX16_B32_e64) { |
4916 | |
4917 | MachineOperand &Src1 = MI.getOperand(VOP3Idx[1]); |
4918 | MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]); |
4919 | const DebugLoc &DL = MI.getDebugLoc(); |
4920 | if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) { |
4921 | Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
4922 | BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) |
4923 | .add(Src1); |
4924 | Src1.ChangeToRegister(Reg, false); |
4925 | } |
4926 | if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) { |
4927 | Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
4928 | BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) |
4929 | .add(Src2); |
4930 | Src2.ChangeToRegister(Reg, false); |
4931 | } |
4932 | } |
4933 | |
4934 | |
4935 | int ConstantBusLimit = ST.getConstantBusLimit(Opc); |
4936 | int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0; |
4937 | SmallDenseSet<unsigned> SGPRsUsed; |
4938 | Register SGPRReg = findUsedSGPR(MI, VOP3Idx); |
4939 | if (SGPRReg != AMDGPU::NoRegister) { |
4940 | SGPRsUsed.insert(SGPRReg); |
4941 | --ConstantBusLimit; |
4942 | } |
4943 | |
4944 | for (unsigned i = 0; i < 3; ++i) { |
4945 | int Idx = VOP3Idx[i]; |
4946 | if (Idx == -1) |
4947 | break; |
4948 | MachineOperand &MO = MI.getOperand(Idx); |
4949 | |
4950 | if (!MO.isReg()) { |
4951 | if (!isLiteralConstantLike(MO, get(Opc).OpInfo[Idx])) |
4952 | continue; |
4953 | |
4954 | if (LiteralLimit > 0 && ConstantBusLimit > 0) { |
4955 | --LiteralLimit; |
4956 | --ConstantBusLimit; |
4957 | continue; |
4958 | } |
4959 | |
4960 | --LiteralLimit; |
4961 | --ConstantBusLimit; |
4962 | legalizeOpWithMove(MI, Idx); |
4963 | continue; |
4964 | } |
4965 | |
4966 | if (RI.hasAGPRs(MRI.getRegClass(MO.getReg())) && |
4967 | !isOperandLegal(MI, Idx, &MO)) { |
4968 | legalizeOpWithMove(MI, Idx); |
4969 | continue; |
4970 | } |
4971 | |
4972 | if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) |
4973 | continue; |
4974 | |
4975 | |
4976 | |
4977 | if (SGPRsUsed.count(MO.getReg())) |
4978 | continue; |
4979 | if (ConstantBusLimit > 0) { |
4980 | SGPRsUsed.insert(MO.getReg()); |
4981 | --ConstantBusLimit; |
4982 | continue; |
4983 | } |
4984 | |
4985 | |
4986 | |
4987 | legalizeOpWithMove(MI, Idx); |
4988 | } |
4989 | } |
4990 | |
4991 | Register SIInstrInfo::readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, |
4992 | MachineRegisterInfo &MRI) const { |
4993 | const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); |
4994 | const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); |
4995 | Register DstReg = MRI.createVirtualRegister(SRC); |
4996 | unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32; |
4997 | |
4998 | if (RI.hasAGPRs(VRC)) { |
4999 | VRC = RI.getEquivalentVGPRClass(VRC); |
5000 | Register NewSrcReg = MRI.createVirtualRegister(VRC); |
5001 | BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), |
5002 | get(TargetOpcode::COPY), NewSrcReg) |
5003 | .addReg(SrcReg); |
5004 | SrcReg = NewSrcReg; |
5005 | } |
5006 | |
5007 | if (SubRegs == 1) { |
5008 | BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), |
5009 | get(AMDGPU::V_READFIRSTLANE_B32), DstReg) |
5010 | .addReg(SrcReg); |
5011 | return DstReg; |
5012 | } |
5013 | |
5014 | SmallVector<unsigned, 8> SRegs; |
5015 | for (unsigned i = 0; i < SubRegs; ++i) { |
5016 | Register SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
5017 | BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), |
5018 | get(AMDGPU::V_READFIRSTLANE_B32), SGPR) |
5019 | .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); |
5020 | SRegs.push_back(SGPR); |
5021 | } |
5022 | |
5023 | MachineInstrBuilder MIB = |
5024 | BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), |
5025 | get(AMDGPU::REG_SEQUENCE), DstReg); |
5026 | for (unsigned i = 0; i < SubRegs; ++i) { |
5027 | MIB.addReg(SRegs[i]); |
5028 | MIB.addImm(RI.getSubRegFromChannel(i)); |
5029 | } |
5030 | return DstReg; |
5031 | } |
5032 | |
5033 | void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI, |
5034 | MachineInstr &MI) const { |
5035 | |
5036 | |
5037 | |
5038 | |
5039 | |
5040 | MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase); |
5041 | if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) { |
5042 | Register SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI); |
5043 | SBase->setReg(SGPR); |
5044 | } |
5045 | MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff); |
5046 | if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) { |
5047 | Register SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI); |
5048 | SOff->setReg(SGPR); |
5049 | } |
5050 | } |
5051 | |
5052 | bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const { |
5053 | unsigned Opc = Inst.getOpcode(); |
5054 | int OldSAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr); |
5055 | if (OldSAddrIdx < 0) |
5056 | return false; |
5057 | |
5058 | assert(isSegmentSpecificFLAT(Inst)); |
5059 | |
5060 | int NewOpc = AMDGPU::getGlobalVaddrOp(Opc); |
5061 | if (NewOpc < 0) |
5062 | NewOpc = AMDGPU::getFlatScratchInstSVfromSS(Opc); |
5063 | if (NewOpc < 0) |
5064 | return false; |
5065 | |
5066 | MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo(); |
5067 | MachineOperand &SAddr = Inst.getOperand(OldSAddrIdx); |
5068 | if (RI.isSGPRReg(MRI, SAddr.getReg())) |
5069 | return false; |
5070 | |
5071 | int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr); |
5072 | if (NewVAddrIdx < 0) |
5073 | return false; |
5074 | |
5075 | int OldVAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr); |
5076 | |
5077 | |
5078 | MachineInstr *VAddrDef = nullptr; |
5079 | if (OldVAddrIdx >= 0) { |
5080 | MachineOperand &VAddr = Inst.getOperand(OldVAddrIdx); |
5081 | VAddrDef = MRI.getUniqueVRegDef(VAddr.getReg()); |
5082 | if (!VAddrDef || VAddrDef->getOpcode() != AMDGPU::V_MOV_B32_e32 || |
5083 | !VAddrDef->getOperand(1).isImm() || |
5084 | VAddrDef->getOperand(1).getImm() != 0) |
5085 | return false; |
5086 | } |
5087 | |
5088 | const MCInstrDesc &NewDesc = get(NewOpc); |
5089 | Inst.setDesc(NewDesc); |
5090 | |
5091 | |
5092 | |
5093 | if (OldVAddrIdx == NewVAddrIdx) { |
5094 | MachineOperand &NewVAddr = Inst.getOperand(NewVAddrIdx); |
5095 | |
5096 | MRI.removeRegOperandFromUseList(&NewVAddr); |
5097 | MRI.moveOperands(&NewVAddr, &SAddr, 1); |
5098 | Inst.RemoveOperand(OldSAddrIdx); |
5099 | |
5100 | |
5101 | MRI.removeRegOperandFromUseList(&NewVAddr); |
5102 | MRI.addRegOperandToUseList(&NewVAddr); |
5103 | } else { |
5104 | assert(OldSAddrIdx == NewVAddrIdx); |
5105 | |
5106 | if (OldVAddrIdx >= 0) { |
5107 | int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc, |
5108 | AMDGPU::OpName::vdst_in); |
5109 | |
5110 | |
5111 | |
5112 | if (NewVDstIn != -1) { |
5113 | int OldVDstIn = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in); |
5114 | Inst.untieRegOperand(OldVDstIn); |
5115 | } |
5116 | |
5117 | Inst.RemoveOperand(OldVAddrIdx); |
5118 | |
5119 | if (NewVDstIn != -1) { |
5120 | int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst); |
5121 | Inst.tieOperands(NewVDst, NewVDstIn); |
5122 | } |
5123 | } |
5124 | } |
5125 | |
5126 | if (VAddrDef && MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg())) |
5127 | VAddrDef->eraseFromParent(); |
5128 | |
5129 | return true; |
5130 | } |
5131 | |
5132 | |
5133 | void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI, |
5134 | MachineInstr &MI) const { |
5135 | if (!isSegmentSpecificFLAT(MI)) |
5136 | return; |
5137 | |
5138 | |
5139 | |
5140 | MachineOperand *SAddr = getNamedOperand(MI, AMDGPU::OpName::saddr); |
5141 | if (!SAddr || RI.isSGPRClass(MRI.getRegClass(SAddr->getReg()))) |
5142 | return; |
5143 | |
5144 | if (moveFlatAddrToVGPR(MI)) |
5145 | return; |
5146 | |
5147 | Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI); |
5148 | SAddr->setReg(ToSGPR); |
5149 | } |
5150 | |
5151 | void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, |
5152 | MachineBasicBlock::iterator I, |
5153 | const TargetRegisterClass *DstRC, |
5154 | MachineOperand &Op, |
5155 | MachineRegisterInfo &MRI, |
5156 | const DebugLoc &DL) const { |
5157 | Register OpReg = Op.getReg(); |
5158 | unsigned OpSubReg = Op.getSubReg(); |
5159 | |
5160 | const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg( |
5161 | RI.getRegClassForReg(MRI, OpReg), OpSubReg); |
5162 | |
5163 | |
5164 | if (DstRC == OpRC) |
5165 | return; |
5166 | |
5167 | Register DstReg = MRI.createVirtualRegister(DstRC); |
5168 | MachineInstr *Copy = |
5169 | BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op); |
5170 | |
5171 | Op.setReg(DstReg); |
5172 | Op.setSubReg(0); |
5173 | |
5174 | MachineInstr *Def = MRI.getVRegDef(OpReg); |
5175 | if (!Def) |
5176 | return; |
5177 | |
5178 | |
5179 | if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass) |
5180 | FoldImmediate(*Copy, *Def, OpReg, &MRI); |
5181 | |
5182 | bool ImpDef = Def->isImplicitDef(); |
5183 | while (!ImpDef && Def && Def->isCopy()) { |
5184 | if (Def->getOperand(1).getReg().isPhysical()) |
5185 | break; |
5186 | Def = MRI.getUniqueVRegDef(Def->getOperand(1).getReg()); |
5187 | ImpDef = Def && Def->isImplicitDef(); |
5188 | } |
5189 | if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) && |
5190 | !ImpDef) |
5191 | Copy->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); |
5192 | } |
5193 | |
5194 | |
5195 | |
5196 | |
5197 | static void |
5198 | emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, |
5199 | MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, |
5200 | const DebugLoc &DL, MachineOperand &Rsrc) { |
5201 | MachineFunction &MF = *OrigBB.getParent(); |
5202 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
5203 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
5204 | unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
5205 | unsigned SaveExecOpc = |
5206 | ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64; |
5207 | unsigned XorTermOpc = |
5208 | ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term; |
5209 | unsigned AndOpc = |
5210 | ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; |
5211 | const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); |
5212 | |
5213 | MachineBasicBlock::iterator I = LoopBB.begin(); |
5214 | |
5215 | SmallVector<Register, 8> ReadlanePieces; |
5216 | Register CondReg = AMDGPU::NoRegister; |
5217 | |
5218 | Register VRsrc = Rsrc.getReg(); |
5219 | unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef()); |
5220 | |
5221 | unsigned RegSize = TRI->getRegSizeInBits(Rsrc.getReg(), MRI); |
5222 | unsigned NumSubRegs = RegSize / 32; |
5223 | assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 && "Unhandled register size"); |
5224 | |
5225 | for (unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) { |
5226 | |
5227 | Register CurRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
5228 | Register CurRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
5229 | |
5230 | |
5231 | BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo) |
5232 | .addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx)); |
5233 | |
5234 | |
5235 | BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi) |
5236 | .addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx + 1)); |
5237 | |
5238 | ReadlanePieces.push_back(CurRegLo); |
5239 | ReadlanePieces.push_back(CurRegHi); |
5240 | |
5241 | |
5242 | Register CurReg = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass); |
5243 | BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), CurReg) |
5244 | .addReg(CurRegLo) |
5245 | .addImm(AMDGPU::sub0) |
5246 | .addReg(CurRegHi) |
5247 | .addImm(AMDGPU::sub1); |
5248 | |
5249 | Register NewCondReg = MRI.createVirtualRegister(BoolXExecRC); |
5250 | auto Cmp = |
5251 | BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), NewCondReg) |
5252 | .addReg(CurReg); |
5253 | if (NumSubRegs <= 2) |
5254 | Cmp.addReg(VRsrc); |
5255 | else |
5256 | Cmp.addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx, 2)); |
5257 | |
5258 | |
5259 | if (CondReg == AMDGPU::NoRegister) |
5260 | CondReg = NewCondReg; |
5261 | else { |
5262 | Register AndReg = MRI.createVirtualRegister(BoolXExecRC); |
5263 | BuildMI(LoopBB, I, DL, TII.get(AndOpc), AndReg) |
5264 | .addReg(CondReg) |
5265 | .addReg(NewCondReg); |
5266 | CondReg = AndReg; |
5267 | } |
5268 | } |
5269 | |
5270 | auto SRsrcRC = TRI->getEquivalentSGPRClass(MRI.getRegClass(VRsrc)); |
5271 | Register SRsrc = MRI.createVirtualRegister(SRsrcRC); |
5272 | |
5273 | |
5274 | auto Merge = BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SRsrc); |
5275 | unsigned Channel = 0; |
5276 | for (Register Piece : ReadlanePieces) { |
5277 | Merge.addReg(Piece) |
5278 | .addImm(TRI->getSubRegFromChannel(Channel++)); |
5279 | } |
5280 | |
5281 | |
5282 | Rsrc.setReg(SRsrc); |
5283 | Rsrc.setIsKill(true); |
5284 | |
5285 | Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); |
5286 | MRI.setSimpleHint(SaveExec, CondReg); |
5287 | |
5288 | |
5289 | BuildMI(LoopBB, I, DL, TII.get(SaveExecOpc), SaveExec) |
5290 | .addReg(CondReg, RegState::Kill); |
5291 | |
5292 | |
5293 | I = LoopBB.end(); |
5294 | |
5295 | |
5296 | BuildMI(LoopBB, I, DL, TII.get(XorTermOpc), Exec) |
5297 | .addReg(Exec) |
5298 | .addReg(SaveExec); |
5299 | |
5300 | BuildMI(LoopBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB); |
5301 | } |
5302 | |
5303 | |
5304 | |
5305 | |
5306 | static MachineBasicBlock * |
5307 | loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, |
5308 | MachineOperand &Rsrc, MachineDominatorTree *MDT, |
5309 | MachineBasicBlock::iterator Begin = nullptr, |
5310 | MachineBasicBlock::iterator End = nullptr) { |
5311 | MachineBasicBlock &MBB = *MI.getParent(); |
5312 | MachineFunction &MF = *MBB.getParent(); |
5313 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
5314 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
5315 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
5316 | if (!Begin.isValid()) |
5317 | Begin = &MI; |
5318 | if (!End.isValid()) { |
5319 | End = &MI; |
5320 | ++End; |
5321 | } |
5322 | const DebugLoc &DL = MI.getDebugLoc(); |
5323 | unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
5324 | unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; |
5325 | const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); |
5326 | |
5327 | Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); |
5328 | |
5329 | |
5330 | BuildMI(MBB, Begin, DL, TII.get(MovExecOpc), SaveExec).addReg(Exec); |
5331 | |
5332 | |
5333 | |
5334 | MachineBasicBlock::iterator AfterMI = MI; |
5335 | ++AfterMI; |
5336 | for (auto I = Begin; I != AfterMI; I++) { |
5337 | for (auto &MO : I->uses()) { |
5338 | if (MO.isReg() && MO.isUse()) { |
5339 | MRI.clearKillFlags(MO.getReg()); |
5340 | } |
5341 | } |
5342 | } |
5343 | |
5344 | |
5345 | |
5346 | MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock(); |
5347 | MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock(); |
5348 | MachineFunction::iterator MBBI(MBB); |
5349 | ++MBBI; |
5350 | |
5351 | MF.insert(MBBI, LoopBB); |
5352 | MF.insert(MBBI, RemainderBB); |
5353 | |
5354 | LoopBB->addSuccessor(LoopBB); |
5355 | LoopBB->addSuccessor(RemainderBB); |
5356 | |
5357 | |
5358 | |
5359 | RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB); |
5360 | RemainderBB->splice(RemainderBB->begin(), &MBB, End, MBB.end()); |
5361 | LoopBB->splice(LoopBB->begin(), &MBB, Begin, MBB.end()); |
5362 | |
5363 | MBB.addSuccessor(LoopBB); |
5364 | |
5365 | |
5366 | |
5367 | |
5368 | |
5369 | if (MDT) { |
5370 | MDT->addNewBlock(LoopBB, &MBB); |
5371 | MDT->addNewBlock(RemainderBB, LoopBB); |
5372 | for (auto &Succ : RemainderBB->successors()) { |
5373 | if (MDT->properlyDominates(&MBB, Succ)) { |
5374 | MDT->changeImmediateDominator(Succ, RemainderBB); |
5375 | } |
5376 | } |
5377 | } |
5378 | |
5379 | emitLoadSRsrcFromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, Rsrc); |
5380 | |
5381 | |
5382 | MachineBasicBlock::iterator First = RemainderBB->begin(); |
5383 | BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc), Exec).addReg(SaveExec); |
5384 | return LoopBB; |
5385 | } |
5386 | |
5387 | |
5388 | static std::tuple<unsigned, unsigned> |
5389 | extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) { |
5390 | MachineBasicBlock &MBB = *MI.getParent(); |
5391 | MachineFunction &MF = *MBB.getParent(); |
5392 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
5393 | |
5394 | |
5395 | unsigned RsrcPtr = |
5396 | TII.buildExtractSubReg(MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass, |
5397 | AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); |
5398 | |
5399 | |
5400 | Register Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); |
5401 | Register SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
5402 | Register SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); |
5403 | Register NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass); |
5404 | uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat(); |
5405 | |
5406 | |
5407 | BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B64), Zero64) |
5408 | .addImm(0); |
5409 | |
5410 | |
5411 | BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatLo) |
5412 | .addImm(RsrcDataFormat & 0xFFFFFFFF); |
5413 | |
5414 | |
5415 | BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatHi) |
5416 | .addImm(RsrcDataFormat >> 32); |
5417 | |
5418 | |
5419 | BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::REG_SEQUENCE), NewSRsrc) |
5420 | .addReg(Zero64) |
5421 | .addImm(AMDGPU::sub0_sub1) |
5422 | .addReg(SRsrcFormatLo) |
5423 | .addImm(AMDGPU::sub2) |
5424 | .addReg(SRsrcFormatHi) |
5425 | .addImm(AMDGPU::sub3); |
5426 | |
5427 | return std::make_tuple(RsrcPtr, NewSRsrc); |
5428 | } |
5429 | |
5430 | MachineBasicBlock * |
5431 | SIInstrInfo::legalizeOperands(MachineInstr &MI, |
5432 | MachineDominatorTree *MDT) const { |
5433 | MachineFunction &MF = *MI.getParent()->getParent(); |
5434 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
5435 | MachineBasicBlock *CreatedBB = nullptr; |
5436 | |
5437 | |
5438 | if (isVOP2(MI) || isVOPC(MI)) { |
5439 | legalizeOperandsVOP2(MRI, MI); |
5440 | return CreatedBB; |
5441 | } |
5442 | |
5443 | |
5444 | if (isVOP3(MI)) { |
5445 | legalizeOperandsVOP3(MRI, MI); |
5446 | return CreatedBB; |
5447 | } |
5448 | |
5449 | |
5450 | if (isSMRD(MI)) { |
5451 | legalizeOperandsSMRD(MRI, MI); |
5452 | return CreatedBB; |
5453 | } |
5454 | |
5455 | |
5456 | if (isFLAT(MI)) { |
5457 | legalizeOperandsFLAT(MRI, MI); |
5458 | return CreatedBB; |
5459 | } |
5460 | |
5461 | |
5462 | |
5463 | |
5464 | if (MI.getOpcode() == AMDGPU::PHI) { |
5465 | const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; |
5466 | for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { |
5467 | if (!MI.getOperand(i).isReg() || !MI.getOperand(i).getReg().isVirtual()) |
5468 | continue; |
5469 | const TargetRegisterClass *OpRC = |
5470 | MRI.getRegClass(MI.getOperand(i).getReg()); |
5471 | if (RI.hasVectorRegisters(OpRC)) { |
5472 | VRC = OpRC; |
5473 | } else { |
5474 | SRC = OpRC; |
5475 | } |
5476 | } |
5477 | |
5478 | |
5479 | |
5480 | |
5481 | if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) { |
5482 | if (!VRC) { |
5483 | assert(SRC); |
5484 | if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) { |
5485 | VRC = &AMDGPU::VReg_1RegClass; |
5486 | } else |
5487 | VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) |
5488 | ? RI.getEquivalentAGPRClass(SRC) |
5489 | : RI.getEquivalentVGPRClass(SRC); |
5490 | } else { |
5491 | VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) |
5492 | ? RI.getEquivalentAGPRClass(VRC) |
5493 | : RI.getEquivalentVGPRClass(VRC); |
5494 | } |
5495 | RC = VRC; |
5496 | } else { |
5497 | RC = SRC; |
5498 | } |
5499 | |
5500 | |
5501 | for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { |
5502 | MachineOperand &Op = MI.getOperand(I); |
5503 | if (!Op.isReg() || !Op.getReg().isVirtual()) |
5504 | continue; |
5505 | |
5506 | |
5507 | MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB(); |
5508 | MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator(); |
5509 | |
5510 | |
5511 | |
5512 | legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc()); |
5513 | } |
5514 | } |
5515 | |
5516 | |
5517 | |
5518 | |
5519 | if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) { |
5520 | MachineBasicBlock *MBB = MI.getParent(); |
5521 | const TargetRegisterClass *DstRC = getOpRegClass(MI, 0); |
5522 | if (RI.hasVGPRs(DstRC)) { |
5523 | |
5524 | |
5525 | |
5526 | for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { |
5527 | MachineOperand &Op = MI.getOperand(I); |
5528 | if (!Op.isReg() || !Op.getReg().isVirtual()) |
5529 | continue; |
5530 | |
5531 | const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg()); |
5532 | const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC); |
5533 | if (VRC == OpRC) |
5534 | continue; |
5535 | |
5536 | legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc()); |
5537 | Op.setIsKill(); |
5538 | } |
5539 | } |
5540 | |
5541 | return CreatedBB; |
5542 | } |
5543 | |
5544 | |
5545 | |
5546 | if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) { |
5547 | Register Dst = MI.getOperand(0).getReg(); |
5548 | Register Src0 = MI.getOperand(1).getReg(); |
5549 | const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); |
5550 | const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); |
5551 | if (DstRC != Src0RC) { |
5552 | MachineBasicBlock *MBB = MI.getParent(); |
5553 | MachineOperand &Op = MI.getOperand(1); |
5554 | legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc()); |
5555 | } |
5556 | return CreatedBB; |
5557 | } |
5558 | |
5559 | |
5560 | if (MI.getOpcode() == AMDGPU::SI_INIT_M0) { |
5561 | MachineOperand &Src = MI.getOperand(0); |
5562 | if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg()))) |
5563 | Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI)); |
5564 | return CreatedBB; |
5565 | } |
5566 | |
5567 | |
5568 | |
5569 | |
5570 | |
5571 | |
5572 | if (isMIMG(MI) || (AMDGPU::isGraphics(MF.getFunction().getCallingConv()) && |
5573 | (isMUBUF(MI) || isMTBUF(MI)))) { |
5574 | MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc); |
5575 | if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) |
5576 | CreatedBB = loadSRsrcFromVGPR(*this, MI, *SRsrc, MDT); |
5577 | |
5578 | MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp); |
5579 | if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) |
5580 | CreatedBB = loadSRsrcFromVGPR(*this, MI, *SSamp, MDT); |
5581 | |
5582 | return CreatedBB; |
5583 | } |
5584 | |
5585 | |
5586 | if (MI.getOpcode() == AMDGPU::SI_CALL_ISEL) { |
5587 | MachineOperand *Dest = &MI.getOperand(0); |
5588 | if (!RI.isSGPRClass(MRI.getRegClass(Dest->getReg()))) { |
5589 | |
5590 | |
5591 | |
5592 | unsigned FrameSetupOpcode = getCallFrameSetupOpcode(); |
5593 | unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode(); |
5594 | |
5595 | |
5596 | MachineBasicBlock &MBB = *MI.getParent(); |
5597 | MachineBasicBlock::iterator Start(&MI); |
5598 | while (Start->getOpcode() != FrameSetupOpcode) |
5599 | --Start; |
5600 | MachineBasicBlock::iterator End(&MI); |
5601 | while (End->getOpcode() != FrameDestroyOpcode) |
5602 | ++End; |
5603 | |
5604 | ++End; |
5605 | while (End != MBB.end() && End->isCopy() && End->getOperand(1).isReg() && |
5606 | MI.definesRegister(End->getOperand(1).getReg())) |
5607 | ++End; |
5608 | CreatedBB = loadSRsrcFromVGPR(*this, MI, *Dest, MDT, Start, End); |
5609 | } |
5610 | } |
5611 | |
5612 | |
5613 | int RsrcIdx = |
5614 | AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); |
5615 | if (RsrcIdx != -1) { |
5616 | |
5617 | MachineOperand *Rsrc = &MI.getOperand(RsrcIdx); |
5618 | unsigned RsrcRC = get(MI.getOpcode()).OpInfo[RsrcIdx].RegClass; |
5619 | if (RI.getCommonSubClass(MRI.getRegClass(Rsrc->getReg()), |
5620 | RI.getRegClass(RsrcRC))) { |
5621 | |
5622 | |
5623 | return CreatedBB; |
5624 | } |
5625 | |
5626 | |
5627 | |
5628 | |
5629 | |
5630 | |
5631 | |
5632 | |
5633 | |
5634 | |
5635 | |
5636 | |
5637 | |
5638 | |
5639 | MachineBasicBlock &MBB = *MI.getParent(); |
5640 | |
5641 | MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr); |
5642 | if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) { |
5643 | |
5644 | |
5645 | Register NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
5646 | Register NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
5647 | Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); |
5648 | |
5649 | const auto *BoolXExecRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); |
5650 | Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC); |
5651 | Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC); |
5652 | |
5653 | unsigned RsrcPtr, NewSRsrc; |
5654 | std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc); |
5655 | |
5656 | |
5657 | const DebugLoc &DL = MI.getDebugLoc(); |
5658 | BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_CO_U32_e64), NewVAddrLo) |
5659 | .addDef(CondReg0) |
5660 | .addReg(RsrcPtr, 0, AMDGPU::sub0) |
5661 | .addReg(VAddr->getReg(), 0, AMDGPU::sub0) |
5662 | .addImm(0); |
5663 | |
5664 | |
5665 | BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e64), NewVAddrHi) |
5666 | .addDef(CondReg1, RegState::Dead) |
5667 | .addReg(RsrcPtr, 0, AMDGPU::sub1) |
5668 | .addReg(VAddr->getReg(), 0, AMDGPU::sub1) |
5669 | .addReg(CondReg0, RegState::Kill) |
5670 | .addImm(0); |
5671 | |
5672 | |
5673 | BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) |
5674 | .addReg(NewVAddrLo) |
5675 | .addImm(AMDGPU::sub0) |
5676 | .addReg(NewVAddrHi) |
5677 | .addImm(AMDGPU::sub1); |
5678 | |
5679 | VAddr->setReg(NewVAddr); |
5680 | Rsrc->setReg(NewSRsrc); |
5681 | } else if (!VAddr && ST.hasAddr64()) { |
5682 | |
5683 | |
5684 | assert(ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS && |
5685 | "FIXME: Need to emit flat atomics here"); |
5686 | |
5687 | unsigned RsrcPtr, NewSRsrc; |
5688 | std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc); |
5689 | |
5690 | Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); |
5691 | MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata); |
5692 | MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); |
5693 | MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset); |
5694 | unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode()); |
5695 | |
5696 | |
5697 | |
5698 | MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in); |
5699 | MachineInstr *Addr64; |
5700 | |
5701 | if (!VDataIn) { |
5702 | |
5703 | MachineInstrBuilder MIB = |
5704 | BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) |
5705 | .add(*VData) |
5706 | .addReg(NewVAddr) |
5707 | .addReg(NewSRsrc) |
5708 | .add(*SOffset) |
5709 | .add(*Offset); |
5710 | |
5711 | if (const MachineOperand *CPol = |
5712 | getNamedOperand(MI, AMDGPU::OpName::cpol)) { |
5713 | MIB.addImm(CPol->getImm()); |
5714 | } |
5715 | |
5716 | if (const MachineOperand *TFE = |
5717 | getNamedOperand(MI, AMDGPU::OpName::tfe)) { |
5718 | MIB.addImm(TFE->getImm()); |
5719 | } |
5720 | |
5721 | MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::swz)); |
5722 | |
5723 | MIB.cloneMemRefs(MI); |
5724 | Addr64 = MIB; |
5725 | } else { |
5726 | |
5727 | Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) |
5728 | .add(*VData) |
5729 | .add(*VDataIn) |
5730 | .addReg(NewVAddr) |
5731 | .addReg(NewSRsrc) |
5732 | .add(*SOffset) |
5733 | .add(*Offset) |
5734 | .addImm(getNamedImmOperand(MI, AMDGPU::OpName::cpol)) |
5735 | .cloneMemRefs(MI); |
5736 | } |
5737 | |
5738 | MI.removeFromParent(); |
5739 | |
5740 | |
5741 | BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), |
5742 | NewVAddr) |
5743 | .addReg(RsrcPtr, 0, AMDGPU::sub0) |
5744 | .addImm(AMDGPU::sub0) |
5745 | .addReg(RsrcPtr, 0, AMDGPU::sub1) |
5746 | .addImm(AMDGPU::sub1); |
5747 | } else { |
5748 | |
5749 | |
5750 | CreatedBB = loadSRsrcFromVGPR(*this, MI, *Rsrc, MDT); |
5751 | return CreatedBB; |
5752 | } |
5753 | } |
5754 | return CreatedBB; |
5755 | } |
5756 | |
5757 | MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, |
5758 | MachineDominatorTree *MDT) const { |
5759 | SetVectorType Worklist; |
5760 | Worklist.insert(&TopInst); |
5761 | MachineBasicBlock *CreatedBB = nullptr; |
5762 | MachineBasicBlock *CreatedBBTmp = nullptr; |
5763 | |
5764 | while (!Worklist.empty()) { |
5765 | MachineInstr &Inst = *Worklist.pop_back_val(); |
5766 | MachineBasicBlock *MBB = Inst.getParent(); |
5767 | MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
5768 | |
5769 | unsigned Opcode = Inst.getOpcode(); |
5770 | unsigned NewOpcode = getVALUOp(Inst); |
5771 | |
5772 | |
5773 | switch (Opcode) { |
5774 | default: |
5775 | break; |
5776 | case AMDGPU::S_ADD_U64_PSEUDO: |
5777 | case AMDGPU::S_SUB_U64_PSEUDO: |
5778 | splitScalar64BitAddSub(Worklist, Inst, MDT); |
5779 | Inst.eraseFromParent(); |
5780 | continue; |
5781 | case AMDGPU::S_ADD_I32: |
5782 | case AMDGPU::S_SUB_I32: { |
5783 | |
5784 | bool Changed; |
5785 | std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT); |
5786 | if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp) |
5787 | CreatedBB = CreatedBBTmp; |
5788 | if (Changed) |
5789 | continue; |
5790 | |
5791 | |
5792 | break; |
5793 | } |
5794 | case AMDGPU::S_AND_B64: |
5795 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT); |
5796 | Inst.eraseFromParent(); |
5797 | continue; |
5798 | |
5799 | case AMDGPU::S_OR_B64: |
5800 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT); |
5801 | Inst.eraseFromParent(); |
5802 | continue; |
5803 | |
5804 | case AMDGPU::S_XOR_B64: |
5805 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT); |
5806 | Inst.eraseFromParent(); |
5807 | continue; |
5808 | |
5809 | case AMDGPU::S_NAND_B64: |
5810 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT); |
5811 | Inst.eraseFromParent(); |
5812 | continue; |
5813 | |
5814 | case AMDGPU::S_NOR_B64: |
5815 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT); |
5816 | Inst.eraseFromParent(); |
5817 | continue; |
5818 | |
5819 | case AMDGPU::S_XNOR_B64: |
5820 | if (ST.hasDLInsts()) |
5821 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT); |
5822 | else |
5823 | splitScalar64BitXnor(Worklist, Inst, MDT); |
5824 | Inst.eraseFromParent(); |
5825 | continue; |
5826 | |
5827 | case AMDGPU::S_ANDN2_B64: |
5828 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT); |
5829 | Inst.eraseFromParent(); |
5830 | continue; |
5831 | |
5832 | case AMDGPU::S_ORN2_B64: |
5833 | splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT); |
5834 | Inst.eraseFromParent(); |
5835 | continue; |
5836 | |
5837 | case AMDGPU::S_BREV_B64: |
5838 | splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32, true); |
5839 | Inst.eraseFromParent(); |
5840 | continue; |
5841 | |
5842 | case AMDGPU::S_NOT_B64: |
5843 | splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32); |
5844 | Inst.eraseFromParent(); |
5845 | continue; |
5846 | |
5847 | case AMDGPU::S_BCNT1_I32_B64: |
5848 | splitScalar64BitBCNT(Worklist, Inst); |
5849 | Inst.eraseFromParent(); |
5850 | continue; |
5851 | |
5852 | case AMDGPU::S_BFE_I64: |
5853 | splitScalar64BitBFE(Worklist, Inst); |
5854 | Inst.eraseFromParent(); |
5855 | continue; |
5856 | |
5857 | case AMDGPU::S_LSHL_B32: |
5858 | if (ST.hasOnlyRevVALUShifts()) { |
5859 | NewOpcode = AMDGPU::V_LSHLREV_B32_e64; |
5860 | swapOperands(Inst); |
5861 | } |
5862 | break; |
5863 | case AMDGPU::S_ASHR_I32: |
5864 | if (ST.hasOnlyRevVALUShifts()) { |
5865 | NewOpcode = AMDGPU::V_ASHRREV_I32_e64; |
5866 | swapOperands(Inst); |
5867 | } |
5868 | break; |
5869 | case AMDGPU::S_LSHR_B32: |
5870 | if (ST.hasOnlyRevVALUShifts()) { |
5871 | NewOpcode = AMDGPU::V_LSHRREV_B32_e64; |
5872 | swapOperands(Inst); |
5873 | } |
5874 | break; |
5875 | case AMDGPU::S_LSHL_B64: |
5876 | if (ST.hasOnlyRevVALUShifts()) { |
5877 | NewOpcode = AMDGPU::V_LSHLREV_B64_e64; |
5878 | swapOperands(Inst); |
5879 | } |
5880 | break; |
5881 | case AMDGPU::S_ASHR_I64: |
5882 | if (ST.hasOnlyRevVALUShifts()) { |
5883 | NewOpcode = AMDGPU::V_ASHRREV_I64_e64; |
5884 | swapOperands(Inst); |
5885 | } |
5886 | break; |
5887 | case AMDGPU::S_LSHR_B64: |
5888 | if (ST.hasOnlyRevVALUShifts()) { |
5889 | NewOpcode = AMDGPU::V_LSHRREV_B64_e64; |
5890 | swapOperands(Inst); |
5891 | } |
5892 | break; |
5893 | |
5894 | case AMDGPU::S_ABS_I32: |
5895 | lowerScalarAbs(Worklist, Inst); |
5896 | Inst.eraseFromParent(); |
5897 | continue; |
5898 | |
5899 | case AMDGPU::S_CBRANCH_SCC0: |
5900 | case AMDGPU::S_CBRANCH_SCC1: |
5901 | |
5902 | if (ST.isWave32()) |
5903 | BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B32), |
5904 | AMDGPU::VCC_LO) |
5905 | .addReg(AMDGPU::EXEC_LO) |
5906 | .addReg(AMDGPU::VCC_LO); |
5907 | else |
5908 | BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64), |
5909 | AMDGPU::VCC) |
5910 | .addReg(AMDGPU::EXEC) |
5911 | .addReg(AMDGPU::VCC); |
5912 | break; |
5913 | |
5914 | case AMDGPU::S_BFE_U64: |
5915 | case AMDGPU::S_BFM_B64: |
5916 | llvm_unreachable("Moving this op to VALU not implemented"); |
5917 | |
5918 | case AMDGPU::S_PACK_LL_B32_B16: |
5919 | case AMDGPU::S_PACK_LH_B32_B16: |
5920 | case AMDGPU::S_PACK_HH_B32_B16: |
5921 | movePackToVALU(Worklist, MRI, Inst); |
5922 | Inst.eraseFromParent(); |
5923 | continue; |
5924 | |
5925 | case AMDGPU::S_XNOR_B32: |
5926 | lowerScalarXnor(Worklist, Inst); |
5927 | Inst.eraseFromParent(); |
5928 | continue; |
5929 | |
5930 | case AMDGPU::S_NAND_B32: |
5931 | splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32); |
5932 | Inst.eraseFromParent(); |
5933 | continue; |
5934 | |
5935 | case AMDGPU::S_NOR_B32: |
5936 | splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32); |
5937 | Inst.eraseFromParent(); |
5938 | continue; |
5939 | |
5940 | case AMDGPU::S_ANDN2_B32: |
5941 | splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32); |
5942 | Inst.eraseFromParent(); |
5943 | continue; |
5944 | |
5945 | case AMDGPU::S_ORN2_B32: |
5946 | splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32); |
5947 | Inst.eraseFromParent(); |
5948 | continue; |
5949 | |
5950 | |
5951 | |
5952 | |
5953 | |
5954 | case AMDGPU::S_ADD_CO_PSEUDO: |
5955 | case AMDGPU::S_SUB_CO_PSEUDO: { |
5956 | unsigned Opc = (Inst.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO) |
5957 | ? AMDGPU::V_ADDC_U32_e64 |
5958 | : AMDGPU::V_SUBB_U32_e64; |
5959 | const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); |
5960 | |
5961 | Register CarryInReg = Inst.getOperand(4).getReg(); |
5962 | if (!MRI.constrainRegClass(CarryInReg, CarryRC)) { |
5963 | Register NewCarryReg = MRI.createVirtualRegister(CarryRC); |
5964 | BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), NewCarryReg) |
5965 | .addReg(CarryInReg); |
5966 | } |
5967 | |
5968 | Register CarryOutReg = Inst.getOperand(1).getReg(); |
5969 | |
5970 | Register DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass( |
5971 | MRI.getRegClass(Inst.getOperand(0).getReg()))); |
5972 | MachineInstr *CarryOp = |
5973 | BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(Opc), DestReg) |
5974 | .addReg(CarryOutReg, RegState::Define) |
5975 | .add(Inst.getOperand(2)) |
5976 | .add(Inst.getOperand(3)) |
5977 | .addReg(CarryInReg) |
5978 | .addImm(0); |
5979 | CreatedBBTmp = legalizeOperands(*CarryOp); |
5980 | if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp) |
5981 | CreatedBB = CreatedBBTmp; |
5982 | MRI.replaceRegWith(Inst.getOperand(0).getReg(), DestReg); |
5983 | addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist); |
5984 | Inst.eraseFromParent(); |
5985 | } |
5986 | continue; |
5987 | case AMDGPU::S_UADDO_PSEUDO: |
5988 | case AMDGPU::S_USUBO_PSEUDO: { |
5989 | const DebugLoc &DL = Inst.getDebugLoc(); |
5990 | MachineOperand &Dest0 = Inst.getOperand(0); |
5991 | MachineOperand &Dest1 = Inst.getOperand(1); |
5992 | MachineOperand &Src0 = Inst.getOperand(2); |
5993 | MachineOperand &Src1 = Inst.getOperand(3); |
5994 | |
5995 | unsigned Opc = (Inst.getOpcode() == AMDGPU::S_UADDO_PSEUDO) |
5996 | ? AMDGPU::V_ADD_CO_U32_e64 |
5997 | : AMDGPU::V_SUB_CO_U32_e64; |
5998 | const TargetRegisterClass *NewRC = |
5999 | RI.getEquivalentVGPRClass(MRI.getRegClass(Dest0.getReg())); |
6000 | Register DestReg = MRI.createVirtualRegister(NewRC); |
6001 | MachineInstr *NewInstr = BuildMI(*MBB, &Inst, DL, get(Opc), DestReg) |
6002 | .addReg(Dest1.getReg(), RegState::Define) |
6003 | .add(Src0) |
6004 | .add(Src1) |
6005 | .addImm(0); |
6006 | |
6007 | CreatedBBTmp = legalizeOperands(*NewInstr, MDT); |
6008 | if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp) |
6009 | CreatedBB = CreatedBBTmp; |
6010 | |
6011 | MRI.replaceRegWith(Dest0.getReg(), DestReg); |
6012 | addUsersToMoveToVALUWorklist(NewInstr->getOperand(0).getReg(), MRI, |
6013 | Worklist); |
6014 | Inst.eraseFromParent(); |
6015 | } |
6016 | continue; |
6017 | |
6018 | case AMDGPU::S_CSELECT_B32: |
6019 | case AMDGPU::S_CSELECT_B64: |
6020 | lowerSelect(Worklist, Inst, MDT); |
6021 | Inst.eraseFromParent(); |
6022 | continue; |
6023 | } |
6024 | |
6025 | if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { |
6026 | |
6027 | |
6028 | CreatedBBTmp = legalizeOperands(Inst, MDT); |
6029 | if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp) |
6030 | CreatedBB = CreatedBBTmp; |
6031 | continue; |
6032 | } |
6033 | |
6034 | |
6035 | const MCInstrDesc &NewDesc = get(NewOpcode); |
6036 | Inst.setDesc(NewDesc); |
6037 | |
6038 | |
6039 | |
6040 | |
6041 | for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) { |
6042 | MachineOperand &Op = Inst.getOperand(i); |
6043 | if (Op.isReg() && Op.getReg() == AMDGPU::SCC) { |
6044 | |
6045 | if (Op.isDef() && !Op.isDead()) |
6046 | addSCCDefUsersToVALUWorklist(Op, Inst, Worklist); |
6047 | if (Op.isUse()) |
6048 | addSCCDefsToVALUWorklist(Op, Worklist); |
6049 | Inst.RemoveOperand(i); |
6050 | } |
6051 | } |
6052 | |
6053 | if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { |
6054 | |
6055 | |
6056 | unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; |
6057 | Inst.addOperand(MachineOperand::CreateImm(0)); |
6058 | Inst.addOperand(MachineOperand::CreateImm(Size)); |
6059 | |
6060 | } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { |
6061 | |
6062 | |
6063 | Inst.addOperand(MachineOperand::CreateImm(0)); |
6064 | } |
6065 | |
6066 | Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent()); |
6067 | fixImplicitOperands(Inst); |
6068 | |
6069 | if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { |
6070 | const MachineOperand &OffsetWidthOp = Inst.getOperand(2); |
6071 | |
6072 | |
6073 | assert(OffsetWidthOp.isImm() && |
6074 | "Scalar BFE is only implemented for constant width and offset"); |
6075 | uint32_t Imm = OffsetWidthOp.getImm(); |
6076 | |
6077 | uint32_t Offset = Imm & 0x3f; |
6078 | uint32_t BitWidth = (Imm & 0x7f0000) >> 16; |
6079 | Inst.RemoveOperand(2); |
6080 | Inst.addOperand(MachineOperand::CreateImm(Offset)); |
6081 | Inst.addOperand(MachineOperand::CreateImm(BitWidth)); |
6082 | } |
6083 | |
6084 | bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef(); |
6085 | unsigned NewDstReg = AMDGPU::NoRegister; |
6086 | if (HasDst) { |
6087 | Register DstReg = Inst.getOperand(0).getReg(); |
6088 | if (DstReg.isPhysical()) |
6089 | continue; |
6090 | |
6091 | |
6092 | const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); |
6093 | if (!NewDstRC) |
6094 | continue; |
6095 | |
6096 | if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() && |
6097 | NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { |
6098 | |
6099 | |
6100 | |
6101 | |
6102 | |
6103 | |
6104 | addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist); |
6105 | MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg()); |
6106 | MRI.clearKillFlags(Inst.getOperand(1).getReg()); |
6107 | Inst.getOperand(0).setReg(DstReg); |
6108 | |
6109 | |
6110 | |
6111 | |
6112 | for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I) |
6113 | Inst.RemoveOperand(I); |
6114 | Inst.setDesc(get(AMDGPU::IMPLICIT_DEF)); |
6115 | continue; |
6116 | } |
6117 | |
6118 | NewDstReg = MRI.createVirtualRegister(NewDstRC); |
6119 | MRI.replaceRegWith(DstReg, NewDstReg); |
6120 | } |
6121 | |
6122 | |
6123 | CreatedBBTmp = legalizeOperands(Inst, MDT); |
6124 | if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp) |
6125 | CreatedBB = CreatedBBTmp; |
6126 | |
6127 | if (HasDst) |
6128 | addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); |
6129 | } |
6130 | return CreatedBB; |
6131 | } |
6132 | |
6133 | |
6134 | std::pair<bool, MachineBasicBlock *> |
6135 | SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst, |
6136 | MachineDominatorTree *MDT) const { |
6137 | if (ST.hasAddNoCarry()) { |
6138 | |
6139 | |
6140 | |
6141 | |
6142 | MachineBasicBlock &MBB = *Inst.getParent(); |
6143 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
6144 | |
6145 | Register OldDstReg = Inst.getOperand(0).getReg(); |
6146 | Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
6147 | |
6148 | unsigned Opc = Inst.getOpcode(); |
6149 | assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32); |
6150 | |
6151 | unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ? |
6152 | AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64; |
6153 | |
6154 | assert(Inst.getOperand(3).getReg() == AMDGPU::SCC); |
6155 | Inst.RemoveOperand(3); |
6156 | |
6157 | Inst.setDesc(get(NewOpc)); |
6158 | Inst.addOperand(MachineOperand::CreateImm(0)); |
6159 | Inst.addImplicitDefUseOperands(*MBB.getParent()); |
6160 | MRI.replaceRegWith(OldDstReg, ResultReg); |
6161 | MachineBasicBlock *NewBB = legalizeOperands(Inst, MDT); |
6162 | |
6163 | addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); |
6164 | return std::make_pair(true, NewBB); |
6165 | } |
6166 | |
6167 | return std::make_pair(false, nullptr); |
6168 | } |
6169 | |
6170 | void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst, |
6171 | MachineDominatorTree *MDT) const { |
6172 | |
6173 | MachineBasicBlock &MBB = *Inst.getParent(); |
6174 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
6175 | MachineBasicBlock::iterator MII = Inst; |
6176 | DebugLoc DL = Inst.getDebugLoc(); |
6177 | |
6178 | MachineOperand &Dest = Inst.getOperand(0); |
6179 | MachineOperand &Src0 = Inst.getOperand(1); |
6180 | MachineOperand &Src1 = Inst.getOperand(2); |
6181 | MachineOperand &Cond = Inst.getOperand(3); |
6182 | |
6183 | Register SCCSource = Cond.getReg(); |
6184 | |
6185 | if (!Cond.isUndef()) { |
6186 | for (MachineInstr &CandI : |
6187 | make_range(std::next(MachineBasicBlock::reverse_iterator(Inst)), |
6188 | Inst.getParent()->rend())) { |
6189 | if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != |
6190 | -1) { |
6191 | if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) { |
6192 | SCCSource = CandI.getOperand(1).getReg(); |
6193 | } |
6194 | break; |
6195 | } |
6196 | } |
6197 | } |
6198 | |
6199 | |
6200 | |
6201 | |
6202 | |
6203 | if ((SCCSource != AMDGPU::SCC) && Src0.isImm() && (Src0.getImm() == -1) && |
6204 | Src1.isImm() && (Src1.getImm() == 0)) { |
6205 | MRI.replaceRegWith(Dest.getReg(), SCCSource); |
6206 | return; |
6207 | } |
6208 | |
6209 | const TargetRegisterClass *TC = ST.getWavefrontSize() == 64 |
6210 | ? &AMDGPU::SReg_64_XEXECRegClass |
6211 | : &AMDGPU::SReg_32_XM0_XEXECRegClass; |
6212 | Register CopySCC = MRI.createVirtualRegister(TC); |
6213 | |
6214 | if (SCCSource == AMDGPU::SCC) { |
6215 | |
6216 | |