File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp |
Warning: | line 736, column 15 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- AMDGPULibCalls.cpp -------------------------------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | /// \file | |||
10 | /// This file does AMD library function optimizations. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "AMDGPU.h" | |||
15 | #include "AMDGPULibFunc.h" | |||
16 | #include "GCNSubtarget.h" | |||
17 | #include "llvm/Analysis/AliasAnalysis.h" | |||
18 | #include "llvm/Analysis/Loads.h" | |||
19 | #include "llvm/IR/IntrinsicsAMDGPU.h" | |||
20 | #include "llvm/IR/IRBuilder.h" | |||
21 | #include "llvm/InitializePasses.h" | |||
22 | #include "llvm/Target/TargetMachine.h" | |||
23 | ||||
24 | #define DEBUG_TYPE"amdgpu-simplifylib" "amdgpu-simplifylib" | |||
25 | ||||
26 | using namespace llvm; | |||
27 | ||||
28 | static cl::opt<bool> EnablePreLink("amdgpu-prelink", | |||
29 | cl::desc("Enable pre-link mode optimizations"), | |||
30 | cl::init(false), | |||
31 | cl::Hidden); | |||
32 | ||||
33 | static cl::list<std::string> UseNative("amdgpu-use-native", | |||
34 | cl::desc("Comma separated list of functions to replace with native, or all"), | |||
35 | cl::CommaSeparated, cl::ValueOptional, | |||
36 | cl::Hidden); | |||
37 | ||||
38 | #define MATH_PInumbers::pi numbers::pi | |||
39 | #define MATH_Enumbers::e numbers::e | |||
40 | #define MATH_SQRT2numbers::sqrt2 numbers::sqrt2 | |||
41 | #define MATH_SQRT1_2numbers::inv_sqrt2 numbers::inv_sqrt2 | |||
42 | ||||
43 | namespace llvm { | |||
44 | ||||
45 | class AMDGPULibCalls { | |||
46 | private: | |||
47 | ||||
48 | typedef llvm::AMDGPULibFunc FuncInfo; | |||
49 | ||||
50 | const TargetMachine *TM; | |||
51 | ||||
52 | // -fuse-native. | |||
53 | bool AllNative = false; | |||
54 | ||||
55 | bool useNativeFunc(const StringRef F) const; | |||
56 | ||||
57 | // Return a pointer (pointer expr) to the function if function defintion with | |||
58 | // "FuncName" exists. It may create a new function prototype in pre-link mode. | |||
59 | FunctionCallee getFunction(Module *M, const FuncInfo &fInfo); | |||
60 | ||||
61 | // Replace a normal function with its native version. | |||
62 | bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo); | |||
63 | ||||
64 | bool parseFunctionName(const StringRef& FMangledName, | |||
65 | FuncInfo *FInfo=nullptr /*out*/); | |||
66 | ||||
67 | bool TDOFold(CallInst *CI, const FuncInfo &FInfo); | |||
68 | ||||
69 | /* Specialized optimizations */ | |||
70 | ||||
71 | // recip (half or native) | |||
72 | bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
73 | ||||
74 | // divide (half or native) | |||
75 | bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
76 | ||||
77 | // pow/powr/pown | |||
78 | bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
79 | ||||
80 | // rootn | |||
81 | bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
82 | ||||
83 | // fma/mad | |||
84 | bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
85 | ||||
86 | // -fuse-native for sincos | |||
87 | bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo); | |||
88 | ||||
89 | // evaluate calls if calls' arguments are constants. | |||
90 | bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0, | |||
91 | double& Res1, Constant *copr0, Constant *copr1, Constant *copr2); | |||
92 | bool evaluateCall(CallInst *aCI, FuncInfo &FInfo); | |||
93 | ||||
94 | // exp | |||
95 | bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
96 | ||||
97 | // exp2 | |||
98 | bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
99 | ||||
100 | // exp10 | |||
101 | bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
102 | ||||
103 | // log | |||
104 | bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
105 | ||||
106 | // log2 | |||
107 | bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
108 | ||||
109 | // log10 | |||
110 | bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
111 | ||||
112 | // sqrt | |||
113 | bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
114 | ||||
115 | // sin/cos | |||
116 | bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA); | |||
117 | ||||
118 | // __read_pipe/__write_pipe | |||
119 | bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo); | |||
120 | ||||
121 | // llvm.amdgcn.wavefrontsize | |||
122 | bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B); | |||
123 | ||||
124 | // Get insertion point at entry. | |||
125 | BasicBlock::iterator getEntryIns(CallInst * UI); | |||
126 | // Insert an Alloc instruction. | |||
127 | AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix); | |||
128 | // Get a scalar native builtin signle argument FP function | |||
129 | FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo); | |||
130 | ||||
131 | protected: | |||
132 | CallInst *CI; | |||
133 | ||||
134 | bool isUnsafeMath(const CallInst *CI) const; | |||
135 | ||||
136 | void replaceCall(Value *With) { | |||
137 | CI->replaceAllUsesWith(With); | |||
138 | CI->eraseFromParent(); | |||
139 | } | |||
140 | ||||
141 | public: | |||
142 | AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {} | |||
143 | ||||
144 | bool fold(CallInst *CI, AliasAnalysis *AA = nullptr); | |||
145 | ||||
146 | void initNativeFuncs(); | |||
147 | ||||
148 | // Replace a normal math function call with that native version | |||
149 | bool useNative(CallInst *CI); | |||
150 | }; | |||
151 | ||||
152 | } // end llvm namespace | |||
153 | ||||
154 | namespace { | |||
155 | ||||
156 | class AMDGPUSimplifyLibCalls : public FunctionPass { | |||
157 | ||||
158 | AMDGPULibCalls Simplifier; | |||
159 | ||||
160 | public: | |||
161 | static char ID; // Pass identification | |||
162 | ||||
163 | AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr) | |||
164 | : FunctionPass(ID), Simplifier(TM) { | |||
165 | initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); | |||
166 | } | |||
167 | ||||
168 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
169 | AU.addRequired<AAResultsWrapperPass>(); | |||
170 | } | |||
171 | ||||
172 | bool runOnFunction(Function &M) override; | |||
173 | }; | |||
174 | ||||
175 | class AMDGPUUseNativeCalls : public FunctionPass { | |||
176 | ||||
177 | AMDGPULibCalls Simplifier; | |||
178 | ||||
179 | public: | |||
180 | static char ID; // Pass identification | |||
181 | ||||
182 | AMDGPUUseNativeCalls() : FunctionPass(ID) { | |||
183 | initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry()); | |||
184 | Simplifier.initNativeFuncs(); | |||
185 | } | |||
186 | ||||
187 | bool runOnFunction(Function &F) override; | |||
188 | }; | |||
189 | ||||
190 | } // end anonymous namespace. | |||
191 | ||||
192 | char AMDGPUSimplifyLibCalls::ID = 0; | |||
193 | char AMDGPUUseNativeCalls::ID = 0; | |||
194 | ||||
195 | INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",static void *initializeAMDGPUSimplifyLibCallsPassOnce(PassRegistry &Registry) { | |||
196 | "Simplify well-known AMD library calls", false, false)static void *initializeAMDGPUSimplifyLibCallsPassOnce(PassRegistry &Registry) { | |||
197 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry); | |||
198 | INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",PassInfo *PI = new PassInfo( "Simplify well-known AMD library calls" , "amdgpu-simplifylib", &AMDGPUSimplifyLibCalls::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPUSimplifyLibCalls>) , false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUSimplifyLibCallsPassFlag ; void llvm::initializeAMDGPUSimplifyLibCallsPass(PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUSimplifyLibCallsPassFlag , initializeAMDGPUSimplifyLibCallsPassOnce, std::ref(Registry )); } | |||
199 | "Simplify well-known AMD library calls", false, false)PassInfo *PI = new PassInfo( "Simplify well-known AMD library calls" , "amdgpu-simplifylib", &AMDGPUSimplifyLibCalls::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPUSimplifyLibCalls>) , false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUSimplifyLibCallsPassFlag ; void llvm::initializeAMDGPUSimplifyLibCallsPass(PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUSimplifyLibCallsPassFlag , initializeAMDGPUSimplifyLibCallsPassOnce, std::ref(Registry )); } | |||
200 | ||||
201 | INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions." , "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry ) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce , std::ref(Registry)); } | |||
202 | "Replace builtin math calls with that native versions.",static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions." , "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry ) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce , std::ref(Registry)); } | |||
203 | false, false)static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions." , "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry ) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce , std::ref(Registry)); } | |||
204 | ||||
205 | template <typename IRB> | |||
206 | static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, | |||
207 | const Twine &Name = "") { | |||
208 | CallInst *R = B.CreateCall(Callee, Arg, Name); | |||
209 | if (Function *F = dyn_cast<Function>(Callee.getCallee())) | |||
210 | R->setCallingConv(F->getCallingConv()); | |||
211 | return R; | |||
212 | } | |||
213 | ||||
214 | template <typename IRB> | |||
215 | static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, | |||
216 | Value *Arg2, const Twine &Name = "") { | |||
217 | CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name); | |||
218 | if (Function *F = dyn_cast<Function>(Callee.getCallee())) | |||
219 | R->setCallingConv(F->getCallingConv()); | |||
220 | return R; | |||
221 | } | |||
222 | ||||
223 | // Data structures for table-driven optimizations. | |||
224 | // FuncTbl works for both f32 and f64 functions with 1 input argument | |||
225 | ||||
226 | struct TableEntry { | |||
227 | double result; | |||
228 | double input; | |||
229 | }; | |||
230 | ||||
231 | /* a list of {result, input} */ | |||
232 | static const TableEntry tbl_acos[] = { | |||
233 | {MATH_PInumbers::pi / 2.0, 0.0}, | |||
234 | {MATH_PInumbers::pi / 2.0, -0.0}, | |||
235 | {0.0, 1.0}, | |||
236 | {MATH_PInumbers::pi, -1.0} | |||
237 | }; | |||
238 | static const TableEntry tbl_acosh[] = { | |||
239 | {0.0, 1.0} | |||
240 | }; | |||
241 | static const TableEntry tbl_acospi[] = { | |||
242 | {0.5, 0.0}, | |||
243 | {0.5, -0.0}, | |||
244 | {0.0, 1.0}, | |||
245 | {1.0, -1.0} | |||
246 | }; | |||
247 | static const TableEntry tbl_asin[] = { | |||
248 | {0.0, 0.0}, | |||
249 | {-0.0, -0.0}, | |||
250 | {MATH_PInumbers::pi / 2.0, 1.0}, | |||
251 | {-MATH_PInumbers::pi / 2.0, -1.0} | |||
252 | }; | |||
253 | static const TableEntry tbl_asinh[] = { | |||
254 | {0.0, 0.0}, | |||
255 | {-0.0, -0.0} | |||
256 | }; | |||
257 | static const TableEntry tbl_asinpi[] = { | |||
258 | {0.0, 0.0}, | |||
259 | {-0.0, -0.0}, | |||
260 | {0.5, 1.0}, | |||
261 | {-0.5, -1.0} | |||
262 | }; | |||
263 | static const TableEntry tbl_atan[] = { | |||
264 | {0.0, 0.0}, | |||
265 | {-0.0, -0.0}, | |||
266 | {MATH_PInumbers::pi / 4.0, 1.0}, | |||
267 | {-MATH_PInumbers::pi / 4.0, -1.0} | |||
268 | }; | |||
269 | static const TableEntry tbl_atanh[] = { | |||
270 | {0.0, 0.0}, | |||
271 | {-0.0, -0.0} | |||
272 | }; | |||
273 | static const TableEntry tbl_atanpi[] = { | |||
274 | {0.0, 0.0}, | |||
275 | {-0.0, -0.0}, | |||
276 | {0.25, 1.0}, | |||
277 | {-0.25, -1.0} | |||
278 | }; | |||
279 | static const TableEntry tbl_cbrt[] = { | |||
280 | {0.0, 0.0}, | |||
281 | {-0.0, -0.0}, | |||
282 | {1.0, 1.0}, | |||
283 | {-1.0, -1.0}, | |||
284 | }; | |||
285 | static const TableEntry tbl_cos[] = { | |||
286 | {1.0, 0.0}, | |||
287 | {1.0, -0.0} | |||
288 | }; | |||
289 | static const TableEntry tbl_cosh[] = { | |||
290 | {1.0, 0.0}, | |||
291 | {1.0, -0.0} | |||
292 | }; | |||
293 | static const TableEntry tbl_cospi[] = { | |||
294 | {1.0, 0.0}, | |||
295 | {1.0, -0.0} | |||
296 | }; | |||
297 | static const TableEntry tbl_erfc[] = { | |||
298 | {1.0, 0.0}, | |||
299 | {1.0, -0.0} | |||
300 | }; | |||
301 | static const TableEntry tbl_erf[] = { | |||
302 | {0.0, 0.0}, | |||
303 | {-0.0, -0.0} | |||
304 | }; | |||
305 | static const TableEntry tbl_exp[] = { | |||
306 | {1.0, 0.0}, | |||
307 | {1.0, -0.0}, | |||
308 | {MATH_Enumbers::e, 1.0} | |||
309 | }; | |||
310 | static const TableEntry tbl_exp2[] = { | |||
311 | {1.0, 0.0}, | |||
312 | {1.0, -0.0}, | |||
313 | {2.0, 1.0} | |||
314 | }; | |||
315 | static const TableEntry tbl_exp10[] = { | |||
316 | {1.0, 0.0}, | |||
317 | {1.0, -0.0}, | |||
318 | {10.0, 1.0} | |||
319 | }; | |||
320 | static const TableEntry tbl_expm1[] = { | |||
321 | {0.0, 0.0}, | |||
322 | {-0.0, -0.0} | |||
323 | }; | |||
324 | static const TableEntry tbl_log[] = { | |||
325 | {0.0, 1.0}, | |||
326 | {1.0, MATH_Enumbers::e} | |||
327 | }; | |||
328 | static const TableEntry tbl_log2[] = { | |||
329 | {0.0, 1.0}, | |||
330 | {1.0, 2.0} | |||
331 | }; | |||
332 | static const TableEntry tbl_log10[] = { | |||
333 | {0.0, 1.0}, | |||
334 | {1.0, 10.0} | |||
335 | }; | |||
336 | static const TableEntry tbl_rsqrt[] = { | |||
337 | {1.0, 1.0}, | |||
338 | {MATH_SQRT1_2numbers::inv_sqrt2, 2.0} | |||
339 | }; | |||
340 | static const TableEntry tbl_sin[] = { | |||
341 | {0.0, 0.0}, | |||
342 | {-0.0, -0.0} | |||
343 | }; | |||
344 | static const TableEntry tbl_sinh[] = { | |||
345 | {0.0, 0.0}, | |||
346 | {-0.0, -0.0} | |||
347 | }; | |||
348 | static const TableEntry tbl_sinpi[] = { | |||
349 | {0.0, 0.0}, | |||
350 | {-0.0, -0.0} | |||
351 | }; | |||
352 | static const TableEntry tbl_sqrt[] = { | |||
353 | {0.0, 0.0}, | |||
354 | {1.0, 1.0}, | |||
355 | {MATH_SQRT2numbers::sqrt2, 2.0} | |||
356 | }; | |||
357 | static const TableEntry tbl_tan[] = { | |||
358 | {0.0, 0.0}, | |||
359 | {-0.0, -0.0} | |||
360 | }; | |||
361 | static const TableEntry tbl_tanh[] = { | |||
362 | {0.0, 0.0}, | |||
363 | {-0.0, -0.0} | |||
364 | }; | |||
365 | static const TableEntry tbl_tanpi[] = { | |||
366 | {0.0, 0.0}, | |||
367 | {-0.0, -0.0} | |||
368 | }; | |||
369 | static const TableEntry tbl_tgamma[] = { | |||
370 | {1.0, 1.0}, | |||
371 | {1.0, 2.0}, | |||
372 | {2.0, 3.0}, | |||
373 | {6.0, 4.0} | |||
374 | }; | |||
375 | ||||
376 | static bool HasNative(AMDGPULibFunc::EFuncId id) { | |||
377 | switch(id) { | |||
378 | case AMDGPULibFunc::EI_DIVIDE: | |||
379 | case AMDGPULibFunc::EI_COS: | |||
380 | case AMDGPULibFunc::EI_EXP: | |||
381 | case AMDGPULibFunc::EI_EXP2: | |||
382 | case AMDGPULibFunc::EI_EXP10: | |||
383 | case AMDGPULibFunc::EI_LOG: | |||
384 | case AMDGPULibFunc::EI_LOG2: | |||
385 | case AMDGPULibFunc::EI_LOG10: | |||
386 | case AMDGPULibFunc::EI_POWR: | |||
387 | case AMDGPULibFunc::EI_RECIP: | |||
388 | case AMDGPULibFunc::EI_RSQRT: | |||
389 | case AMDGPULibFunc::EI_SIN: | |||
390 | case AMDGPULibFunc::EI_SINCOS: | |||
391 | case AMDGPULibFunc::EI_SQRT: | |||
392 | case AMDGPULibFunc::EI_TAN: | |||
393 | return true; | |||
394 | default:; | |||
395 | } | |||
396 | return false; | |||
397 | } | |||
398 | ||||
399 | struct TableRef { | |||
400 | size_t size; | |||
401 | const TableEntry *table; // variable size: from 0 to (size - 1) | |||
402 | ||||
403 | TableRef() : size(0), table(nullptr) {} | |||
404 | ||||
405 | template <size_t N> | |||
406 | TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {} | |||
407 | }; | |||
408 | ||||
409 | static TableRef getOptTable(AMDGPULibFunc::EFuncId id) { | |||
410 | switch(id) { | |||
411 | case AMDGPULibFunc::EI_ACOS: return TableRef(tbl_acos); | |||
412 | case AMDGPULibFunc::EI_ACOSH: return TableRef(tbl_acosh); | |||
413 | case AMDGPULibFunc::EI_ACOSPI: return TableRef(tbl_acospi); | |||
414 | case AMDGPULibFunc::EI_ASIN: return TableRef(tbl_asin); | |||
415 | case AMDGPULibFunc::EI_ASINH: return TableRef(tbl_asinh); | |||
416 | case AMDGPULibFunc::EI_ASINPI: return TableRef(tbl_asinpi); | |||
417 | case AMDGPULibFunc::EI_ATAN: return TableRef(tbl_atan); | |||
418 | case AMDGPULibFunc::EI_ATANH: return TableRef(tbl_atanh); | |||
419 | case AMDGPULibFunc::EI_ATANPI: return TableRef(tbl_atanpi); | |||
420 | case AMDGPULibFunc::EI_CBRT: return TableRef(tbl_cbrt); | |||
421 | case AMDGPULibFunc::EI_NCOS: | |||
422 | case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos); | |||
423 | case AMDGPULibFunc::EI_COSH: return TableRef(tbl_cosh); | |||
424 | case AMDGPULibFunc::EI_COSPI: return TableRef(tbl_cospi); | |||
425 | case AMDGPULibFunc::EI_ERFC: return TableRef(tbl_erfc); | |||
426 | case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf); | |||
427 | case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp); | |||
428 | case AMDGPULibFunc::EI_NEXP2: | |||
429 | case AMDGPULibFunc::EI_EXP2: return TableRef(tbl_exp2); | |||
430 | case AMDGPULibFunc::EI_EXP10: return TableRef(tbl_exp10); | |||
431 | case AMDGPULibFunc::EI_EXPM1: return TableRef(tbl_expm1); | |||
432 | case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log); | |||
433 | case AMDGPULibFunc::EI_NLOG2: | |||
434 | case AMDGPULibFunc::EI_LOG2: return TableRef(tbl_log2); | |||
435 | case AMDGPULibFunc::EI_LOG10: return TableRef(tbl_log10); | |||
436 | case AMDGPULibFunc::EI_NRSQRT: | |||
437 | case AMDGPULibFunc::EI_RSQRT: return TableRef(tbl_rsqrt); | |||
438 | case AMDGPULibFunc::EI_NSIN: | |||
439 | case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin); | |||
440 | case AMDGPULibFunc::EI_SINH: return TableRef(tbl_sinh); | |||
441 | case AMDGPULibFunc::EI_SINPI: return TableRef(tbl_sinpi); | |||
442 | case AMDGPULibFunc::EI_NSQRT: | |||
443 | case AMDGPULibFunc::EI_SQRT: return TableRef(tbl_sqrt); | |||
444 | case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan); | |||
445 | case AMDGPULibFunc::EI_TANH: return TableRef(tbl_tanh); | |||
446 | case AMDGPULibFunc::EI_TANPI: return TableRef(tbl_tanpi); | |||
447 | case AMDGPULibFunc::EI_TGAMMA: return TableRef(tbl_tgamma); | |||
448 | default:; | |||
449 | } | |||
450 | return TableRef(); | |||
451 | } | |||
452 | ||||
453 | static inline int getVecSize(const AMDGPULibFunc& FInfo) { | |||
454 | return FInfo.getLeads()[0].VectorSize; | |||
455 | } | |||
456 | ||||
457 | static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) { | |||
458 | return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType; | |||
459 | } | |||
460 | ||||
461 | FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) { | |||
462 | // If we are doing PreLinkOpt, the function is external. So it is safe to | |||
463 | // use getOrInsertFunction() at this stage. | |||
464 | ||||
465 | return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo) | |||
466 | : AMDGPULibFunc::getFunction(M, fInfo); | |||
467 | } | |||
468 | ||||
469 | bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName, | |||
470 | FuncInfo *FInfo) { | |||
471 | return AMDGPULibFunc::parse(FMangledName, *FInfo); | |||
472 | } | |||
473 | ||||
474 | bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const { | |||
475 | if (auto Op = dyn_cast<FPMathOperator>(CI)) | |||
476 | if (Op->isFast()) | |||
477 | return true; | |||
478 | const Function *F = CI->getParent()->getParent(); | |||
479 | Attribute Attr = F->getFnAttribute("unsafe-fp-math"); | |||
480 | return Attr.getValueAsBool(); | |||
481 | } | |||
482 | ||||
483 | bool AMDGPULibCalls::useNativeFunc(const StringRef F) const { | |||
484 | return AllNative || llvm::is_contained(UseNative, F); | |||
485 | } | |||
486 | ||||
487 | void AMDGPULibCalls::initNativeFuncs() { | |||
488 | AllNative = useNativeFunc("all") || | |||
489 | (UseNative.getNumOccurrences() && UseNative.size() == 1 && | |||
490 | UseNative.begin()->empty()); | |||
491 | } | |||
492 | ||||
493 | bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) { | |||
494 | bool native_sin = useNativeFunc("sin"); | |||
495 | bool native_cos = useNativeFunc("cos"); | |||
496 | ||||
497 | if (native_sin && native_cos) { | |||
498 | Module *M = aCI->getModule(); | |||
499 | Value *opr0 = aCI->getArgOperand(0); | |||
500 | ||||
501 | AMDGPULibFunc nf; | |||
502 | nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType; | |||
503 | nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize; | |||
504 | ||||
505 | nf.setPrefix(AMDGPULibFunc::NATIVE); | |||
506 | nf.setId(AMDGPULibFunc::EI_SIN); | |||
507 | FunctionCallee sinExpr = getFunction(M, nf); | |||
508 | ||||
509 | nf.setPrefix(AMDGPULibFunc::NATIVE); | |||
510 | nf.setId(AMDGPULibFunc::EI_COS); | |||
511 | FunctionCallee cosExpr = getFunction(M, nf); | |||
512 | if (sinExpr && cosExpr) { | |||
513 | Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI); | |||
514 | Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI); | |||
515 | new StoreInst(cosval, aCI->getArgOperand(1), aCI); | |||
516 | ||||
517 | DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCIdo { } while (false) | |||
518 | << " with native version of sin/cos")do { } while (false); | |||
519 | ||||
520 | replaceCall(sinval); | |||
521 | return true; | |||
522 | } | |||
523 | } | |||
524 | return false; | |||
525 | } | |||
526 | ||||
527 | bool AMDGPULibCalls::useNative(CallInst *aCI) { | |||
528 | CI = aCI; | |||
529 | Function *Callee = aCI->getCalledFunction(); | |||
530 | ||||
531 | FuncInfo FInfo; | |||
532 | if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() || | |||
533 | FInfo.getPrefix() != AMDGPULibFunc::NOPFX || | |||
534 | getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) || | |||
535 | !(AllNative || useNativeFunc(FInfo.getName()))) { | |||
536 | return false; | |||
537 | } | |||
538 | ||||
539 | if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS) | |||
540 | return sincosUseNative(aCI, FInfo); | |||
541 | ||||
542 | FInfo.setPrefix(AMDGPULibFunc::NATIVE); | |||
543 | FunctionCallee F = getFunction(aCI->getModule(), FInfo); | |||
544 | if (!F) | |||
545 | return false; | |||
546 | ||||
547 | aCI->setCalledFunction(F); | |||
548 | DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCIdo { } while (false) | |||
549 | << " with native version")do { } while (false); | |||
550 | return true; | |||
551 | } | |||
552 | ||||
553 | // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe | |||
554 | // builtin, with appended type size and alignment arguments, where 2 or 4 | |||
555 | // indicates the original number of arguments. The library has optimized version | |||
556 | // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same | |||
557 | // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N | |||
558 | // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ..., | |||
559 | // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4. | |||
560 | bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, | |||
561 | FuncInfo &FInfo) { | |||
562 | auto *Callee = CI->getCalledFunction(); | |||
563 | if (!Callee->isDeclaration()) | |||
564 | return false; | |||
565 | ||||
566 | assert(Callee->hasName() && "Invalid read_pipe/write_pipe function")((void)0); | |||
567 | auto *M = Callee->getParent(); | |||
568 | auto &Ctx = M->getContext(); | |||
569 | std::string Name = std::string(Callee->getName()); | |||
570 | auto NumArg = CI->getNumArgOperands(); | |||
571 | if (NumArg != 4 && NumArg != 6) | |||
572 | return false; | |||
573 | auto *PacketSize = CI->getArgOperand(NumArg - 2); | |||
574 | auto *PacketAlign = CI->getArgOperand(NumArg - 1); | |||
575 | if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign)) | |||
576 | return false; | |||
577 | unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue(); | |||
578 | Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue(); | |||
579 | if (Alignment != Size) | |||
580 | return false; | |||
581 | ||||
582 | Type *PtrElemTy; | |||
583 | if (Size <= 8) | |||
584 | PtrElemTy = Type::getIntNTy(Ctx, Size * 8); | |||
585 | else | |||
586 | PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8); | |||
587 | unsigned PtrArgLoc = CI->getNumArgOperands() - 3; | |||
588 | auto PtrArg = CI->getArgOperand(PtrArgLoc); | |||
589 | unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace(); | |||
590 | auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS); | |||
591 | ||||
592 | SmallVector<llvm::Type *, 6> ArgTys; | |||
593 | for (unsigned I = 0; I != PtrArgLoc; ++I) | |||
594 | ArgTys.push_back(CI->getArgOperand(I)->getType()); | |||
595 | ArgTys.push_back(PtrTy); | |||
596 | ||||
597 | Name = Name + "_" + std::to_string(Size); | |||
598 | auto *FTy = FunctionType::get(Callee->getReturnType(), | |||
599 | ArrayRef<Type *>(ArgTys), false); | |||
600 | AMDGPULibFunc NewLibFunc(Name, FTy); | |||
601 | FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc); | |||
602 | if (!F) | |||
603 | return false; | |||
604 | ||||
605 | auto *BCast = B.CreatePointerCast(PtrArg, PtrTy); | |||
606 | SmallVector<Value *, 6> Args; | |||
607 | for (unsigned I = 0; I != PtrArgLoc; ++I) | |||
608 | Args.push_back(CI->getArgOperand(I)); | |||
609 | Args.push_back(BCast); | |||
610 | ||||
611 | auto *NCI = B.CreateCall(F, Args); | |||
612 | NCI->setAttributes(CI->getAttributes()); | |||
613 | CI->replaceAllUsesWith(NCI); | |||
614 | CI->dropAllReferences(); | |||
615 | CI->eraseFromParent(); | |||
616 | ||||
617 | return true; | |||
618 | } | |||
619 | ||||
620 | // This function returns false if no change; return true otherwise. | |||
621 | bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) { | |||
622 | this->CI = CI; | |||
623 | Function *Callee = CI->getCalledFunction(); | |||
624 | ||||
625 | // Ignore indirect calls. | |||
626 | if (Callee
| |||
627 | ||||
628 | BasicBlock *BB = CI->getParent(); | |||
629 | LLVMContext &Context = CI->getParent()->getContext(); | |||
630 | IRBuilder<> B(Context); | |||
631 | ||||
632 | // Set the builder to the instruction after the call. | |||
633 | B.SetInsertPoint(BB, CI->getIterator()); | |||
634 | ||||
635 | // Copy fast flags from the original call. | |||
636 | if (const FPMathOperator *FPOp
| |||
637 | B.setFastMathFlags(FPOp->getFastMathFlags()); | |||
638 | ||||
639 | switch (Callee->getIntrinsicID()) { | |||
640 | default: | |||
641 | break; | |||
642 | case Intrinsic::amdgcn_wavefrontsize: | |||
643 | return !EnablePreLink && fold_wavefrontsize(CI, B); | |||
644 | } | |||
645 | ||||
646 | FuncInfo FInfo; | |||
647 | if (!parseFunctionName(Callee->getName(), &FInfo)) | |||
648 | return false; | |||
649 | ||||
650 | // Further check the number of arguments to see if they match. | |||
651 | if (CI->getNumArgOperands() != FInfo.getNumArgs()) | |||
652 | return false; | |||
653 | ||||
654 | if (TDOFold(CI, FInfo)) | |||
655 | return true; | |||
656 | ||||
657 | // Under unsafe-math, evaluate calls if possible. | |||
658 | // According to Brian Sumner, we can do this for all f32 function calls | |||
659 | // using host's double function calls. | |||
660 | if (isUnsafeMath(CI) && evaluateCall(CI, FInfo)) | |||
661 | return true; | |||
662 | ||||
663 | // Specilized optimizations for each function call | |||
664 | switch (FInfo.getId()) { | |||
665 | case AMDGPULibFunc::EI_RECIP: | |||
666 | // skip vector function | |||
667 | assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||((void)0) | |||
668 | FInfo.getPrefix() == AMDGPULibFunc::HALF) &&((void)0) | |||
669 | "recip must be an either native or half function")((void)0); | |||
670 | return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo); | |||
671 | ||||
672 | case AMDGPULibFunc::EI_DIVIDE: | |||
673 | // skip vector function | |||
674 | assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||((void)0) | |||
675 | FInfo.getPrefix() == AMDGPULibFunc::HALF) &&((void)0) | |||
676 | "divide must be an either native or half function")((void)0); | |||
677 | return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo); | |||
678 | ||||
679 | case AMDGPULibFunc::EI_POW: | |||
680 | case AMDGPULibFunc::EI_POWR: | |||
681 | case AMDGPULibFunc::EI_POWN: | |||
682 | return fold_pow(CI, B, FInfo); | |||
683 | ||||
684 | case AMDGPULibFunc::EI_ROOTN: | |||
685 | // skip vector function | |||
686 | return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo); | |||
687 | ||||
688 | case AMDGPULibFunc::EI_FMA: | |||
689 | case AMDGPULibFunc::EI_MAD: | |||
690 | case AMDGPULibFunc::EI_NFMA: | |||
691 | // skip vector function | |||
692 | return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo); | |||
693 | ||||
694 | case AMDGPULibFunc::EI_SQRT: | |||
695 | return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo); | |||
696 | case AMDGPULibFunc::EI_COS: | |||
697 | case AMDGPULibFunc::EI_SIN: | |||
698 | if ((getArgType(FInfo) == AMDGPULibFunc::F32 || | |||
699 | getArgType(FInfo) == AMDGPULibFunc::F64) | |||
700 | && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX)) | |||
701 | return fold_sincos(CI, B, AA); | |||
702 | ||||
703 | break; | |||
704 | case AMDGPULibFunc::EI_READ_PIPE_2: | |||
705 | case AMDGPULibFunc::EI_READ_PIPE_4: | |||
706 | case AMDGPULibFunc::EI_WRITE_PIPE_2: | |||
707 | case AMDGPULibFunc::EI_WRITE_PIPE_4: | |||
708 | return fold_read_write_pipe(CI, B, FInfo); | |||
709 | ||||
710 | default: | |||
711 | break; | |||
712 | } | |||
713 | ||||
714 | return false; | |||
715 | } | |||
716 | ||||
717 | bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { | |||
718 | // Table-Driven optimization | |||
719 | const TableRef tr = getOptTable(FInfo.getId()); | |||
720 | if (tr.size
| |||
721 | return false; | |||
722 | ||||
723 | int const sz = (int)tr.size; | |||
724 | const TableEntry * const ftbl = tr.table; | |||
725 | Value *opr0 = CI->getArgOperand(0); | |||
726 | ||||
727 | if (getVecSize(FInfo) > 1) { | |||
728 | if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) { | |||
729 | SmallVector<double, 0> DVal; | |||
730 | for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) { | |||
731 | ConstantFP *eltval = dyn_cast<ConstantFP>( | |||
732 | CV->getElementAsConstant((unsigned)eltNo)); | |||
733 | assert(eltval && "Non-FP arguments in math function!")((void)0); | |||
734 | bool found = false; | |||
735 | for (int i=0; i < sz; ++i) { | |||
736 | if (eltval->isExactlyValue(ftbl[i].input)) { | |||
| ||||
737 | DVal.push_back(ftbl[i].result); | |||
738 | found = true; | |||
739 | break; | |||
740 | } | |||
741 | } | |||
742 | if (!found) { | |||
743 | // This vector constants not handled yet. | |||
744 | return false; | |||
745 | } | |||
746 | } | |||
747 | LLVMContext &context = CI->getParent()->getParent()->getContext(); | |||
748 | Constant *nval; | |||
749 | if (getArgType(FInfo) == AMDGPULibFunc::F32) { | |||
750 | SmallVector<float, 0> FVal; | |||
751 | for (unsigned i = 0; i < DVal.size(); ++i) { | |||
752 | FVal.push_back((float)DVal[i]); | |||
753 | } | |||
754 | ArrayRef<float> tmp(FVal); | |||
755 | nval = ConstantDataVector::get(context, tmp); | |||
756 | } else { // F64 | |||
757 | ArrayRef<double> tmp(DVal); | |||
758 | nval = ConstantDataVector::get(context, tmp); | |||
759 | } | |||
760 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n")do { } while (false); | |||
761 | replaceCall(nval); | |||
762 | return true; | |||
763 | } | |||
764 | } else { | |||
765 | // Scalar version | |||
766 | if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) { | |||
767 | for (int i = 0; i < sz; ++i) { | |||
768 | if (CF->isExactlyValue(ftbl[i].input)) { | |||
769 | Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result); | |||
770 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n")do { } while (false); | |||
771 | replaceCall(nval); | |||
772 | return true; | |||
773 | } | |||
774 | } | |||
775 | } | |||
776 | } | |||
777 | ||||
778 | return false; | |||
779 | } | |||
780 | ||||
781 | bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) { | |||
782 | Module *M = CI->getModule(); | |||
783 | if (getArgType(FInfo) != AMDGPULibFunc::F32 || | |||
784 | FInfo.getPrefix() != AMDGPULibFunc::NOPFX || | |||
785 | !HasNative(FInfo.getId())) | |||
786 | return false; | |||
787 | ||||
788 | AMDGPULibFunc nf = FInfo; | |||
789 | nf.setPrefix(AMDGPULibFunc::NATIVE); | |||
790 | if (FunctionCallee FPExpr = getFunction(M, nf)) { | |||
791 | LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ")do { } while (false); | |||
792 | ||||
793 | CI->setCalledFunction(FPExpr); | |||
794 | ||||
795 | LLVM_DEBUG(dbgs() << *CI << '\n')do { } while (false); | |||
796 | ||||
797 | return true; | |||
798 | } | |||
799 | return false; | |||
800 | } | |||
801 | ||||
802 | // [native_]half_recip(c) ==> 1.0/c | |||
803 | bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B, | |||
804 | const FuncInfo &FInfo) { | |||
805 | Value *opr0 = CI->getArgOperand(0); | |||
806 | if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) { | |||
807 | // Just create a normal div. Later, InstCombine will be able | |||
808 | // to compute the divide into a constant (avoid check float infinity | |||
809 | // or subnormal at this point). | |||
810 | Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0), | |||
811 | opr0, | |||
812 | "recip2div"); | |||
813 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n")do { } while (false); | |||
814 | replaceCall(nval); | |||
815 | return true; | |||
816 | } | |||
817 | return false; | |||
818 | } | |||
819 | ||||
820 | // [native_]half_divide(x, c) ==> x/c | |||
821 | bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B, | |||
822 | const FuncInfo &FInfo) { | |||
823 | Value *opr0 = CI->getArgOperand(0); | |||
824 | Value *opr1 = CI->getArgOperand(1); | |||
825 | ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0); | |||
826 | ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1); | |||
827 | ||||
828 | if ((CF0 && CF1) || // both are constants | |||
829 | (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32))) | |||
830 | // CF1 is constant && f32 divide | |||
831 | { | |||
832 | Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0), | |||
833 | opr1, "__div2recip"); | |||
834 | Value *nval = B.CreateFMul(opr0, nval1, "__div2mul"); | |||
835 | replaceCall(nval); | |||
836 | return true; | |||
837 | } | |||
838 | return false; | |||
839 | } | |||
840 | ||||
841 | namespace llvm { | |||
842 | static double log2(double V) { | |||
843 | #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L | |||
844 | return ::log2(V); | |||
845 | #else | |||
846 | return log(V) / numbers::ln2; | |||
847 | #endif | |||
848 | } | |||
849 | } | |||
850 | ||||
851 | bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B, | |||
852 | const FuncInfo &FInfo) { | |||
853 | assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||((void)0) | |||
854 | FInfo.getId() == AMDGPULibFunc::EI_POWR ||((void)0) | |||
855 | FInfo.getId() == AMDGPULibFunc::EI_POWN) &&((void)0) | |||
856 | "fold_pow: encounter a wrong function call")((void)0); | |||
857 | ||||
858 | Value *opr0, *opr1; | |||
859 | ConstantFP *CF; | |||
860 | ConstantInt *CINT; | |||
861 | ConstantAggregateZero *CZero; | |||
862 | Type *eltType; | |||
863 | ||||
864 | opr0 = CI->getArgOperand(0); | |||
865 | opr1 = CI->getArgOperand(1); | |||
866 | CZero = dyn_cast<ConstantAggregateZero>(opr1); | |||
867 | if (getVecSize(FInfo) == 1) { | |||
868 | eltType = opr0->getType(); | |||
869 | CF = dyn_cast<ConstantFP>(opr1); | |||
870 | CINT = dyn_cast<ConstantInt>(opr1); | |||
871 | } else { | |||
872 | VectorType *VTy = dyn_cast<VectorType>(opr0->getType()); | |||
873 | assert(VTy && "Oprand of vector function should be of vectortype")((void)0); | |||
874 | eltType = VTy->getElementType(); | |||
875 | ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1); | |||
876 | ||||
877 | // Now, only Handle vector const whose elements have the same value. | |||
878 | CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr; | |||
879 | CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr; | |||
880 | } | |||
881 | ||||
882 | // No unsafe math , no constant argument, do nothing | |||
883 | if (!isUnsafeMath(CI) && !CF && !CINT && !CZero) | |||
884 | return false; | |||
885 | ||||
886 | // 0x1111111 means that we don't do anything for this call. | |||
887 | int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111); | |||
888 | ||||
889 | if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) { | |||
890 | // pow/powr/pown(x, 0) == 1 | |||
891 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n")do { } while (false); | |||
892 | Constant *cnval = ConstantFP::get(eltType, 1.0); | |||
893 | if (getVecSize(FInfo) > 1) { | |||
894 | cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); | |||
895 | } | |||
896 | replaceCall(cnval); | |||
897 | return true; | |||
898 | } | |||
899 | if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) { | |||
900 | // pow/powr/pown(x, 1.0) = x | |||
901 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n")do { } while (false); | |||
902 | replaceCall(opr0); | |||
903 | return true; | |||
904 | } | |||
905 | if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) { | |||
906 | // pow/powr/pown(x, 2.0) = x*x | |||
907 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0do { } while (false) | |||
908 | << "\n")do { } while (false); | |||
909 | Value *nval = B.CreateFMul(opr0, opr0, "__pow2"); | |||
910 | replaceCall(nval); | |||
911 | return true; | |||
912 | } | |||
913 | if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) { | |||
914 | // pow/powr/pown(x, -1.0) = 1.0/x | |||
915 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n")do { } while (false); | |||
916 | Constant *cnval = ConstantFP::get(eltType, 1.0); | |||
917 | if (getVecSize(FInfo) > 1) { | |||
918 | cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); | |||
919 | } | |||
920 | Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip"); | |||
921 | replaceCall(nval); | |||
922 | return true; | |||
923 | } | |||
924 | ||||
925 | Module *M = CI->getModule(); | |||
926 | if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) { | |||
927 | // pow[r](x, [-]0.5) = sqrt(x) | |||
928 | bool issqrt = CF->isExactlyValue(0.5); | |||
929 | if (FunctionCallee FPExpr = | |||
930 | getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT | |||
931 | : AMDGPULibFunc::EI_RSQRT, | |||
932 | FInfo))) { | |||
933 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { } while (false) | |||
934 | << FInfo.getName().c_str() << "(" << *opr0 << ")\n")do { } while (false); | |||
935 | Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt" | |||
936 | : "__pow2rsqrt"); | |||
937 | replaceCall(nval); | |||
938 | return true; | |||
939 | } | |||
940 | } | |||
941 | ||||
942 | if (!isUnsafeMath(CI)) | |||
943 | return false; | |||
944 | ||||
945 | // Unsafe Math optimization | |||
946 | ||||
947 | // Remember that ci_opr1 is set if opr1 is integral | |||
948 | if (CF) { | |||
949 | double dval = (getArgType(FInfo) == AMDGPULibFunc::F32) | |||
950 | ? (double)CF->getValueAPF().convertToFloat() | |||
951 | : CF->getValueAPF().convertToDouble(); | |||
952 | int ival = (int)dval; | |||
953 | if ((double)ival == dval) { | |||
954 | ci_opr1 = ival; | |||
955 | } else | |||
956 | ci_opr1 = 0x11111111; | |||
957 | } | |||
958 | ||||
959 | // pow/powr/pown(x, c) = [1/](x*x*..x); where | |||
960 | // trunc(c) == c && the number of x == c && |c| <= 12 | |||
961 | unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1; | |||
962 | if (abs_opr1 <= 12) { | |||
963 | Constant *cnval; | |||
964 | Value *nval; | |||
965 | if (abs_opr1 == 0) { | |||
966 | cnval = ConstantFP::get(eltType, 1.0); | |||
967 | if (getVecSize(FInfo) > 1) { | |||
968 | cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); | |||
969 | } | |||
970 | nval = cnval; | |||
971 | } else { | |||
972 | Value *valx2 = nullptr; | |||
973 | nval = nullptr; | |||
974 | while (abs_opr1 > 0) { | |||
975 | valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0; | |||
976 | if (abs_opr1 & 1) { | |||
977 | nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2; | |||
978 | } | |||
979 | abs_opr1 >>= 1; | |||
980 | } | |||
981 | } | |||
982 | ||||
983 | if (ci_opr1 < 0) { | |||
984 | cnval = ConstantFP::get(eltType, 1.0); | |||
985 | if (getVecSize(FInfo) > 1) { | |||
986 | cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); | |||
987 | } | |||
988 | nval = B.CreateFDiv(cnval, nval, "__1powprod"); | |||
989 | } | |||
990 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { } while (false) | |||
991 | << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0do { } while (false) | |||
992 | << ")\n")do { } while (false); | |||
993 | replaceCall(nval); | |||
994 | return true; | |||
995 | } | |||
996 | ||||
997 | // powr ---> exp2(y * log2(x)) | |||
998 | // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31)) | |||
999 | FunctionCallee ExpExpr = | |||
1000 | getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo)); | |||
1001 | if (!ExpExpr) | |||
1002 | return false; | |||
1003 | ||||
1004 | bool needlog = false; | |||
1005 | bool needabs = false; | |||
1006 | bool needcopysign = false; | |||
1007 | Constant *cnval = nullptr; | |||
1008 | if (getVecSize(FInfo) == 1) { | |||
1009 | CF = dyn_cast<ConstantFP>(opr0); | |||
1010 | ||||
1011 | if (CF) { | |||
1012 | double V = (getArgType(FInfo) == AMDGPULibFunc::F32) | |||
1013 | ? (double)CF->getValueAPF().convertToFloat() | |||
1014 | : CF->getValueAPF().convertToDouble(); | |||
1015 | ||||
1016 | V = log2(std::abs(V)); | |||
1017 | cnval = ConstantFP::get(eltType, V); | |||
1018 | needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) && | |||
1019 | CF->isNegative(); | |||
1020 | } else { | |||
1021 | needlog = true; | |||
1022 | needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR && | |||
1023 | (!CF || CF->isNegative()); | |||
1024 | } | |||
1025 | } else { | |||
1026 | ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0); | |||
1027 | ||||
1028 | if (!CDV) { | |||
1029 | needlog = true; | |||
1030 | needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR; | |||
1031 | } else { | |||
1032 | assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&((void)0) | |||
1033 | "Wrong vector size detected")((void)0); | |||
1034 | ||||
1035 | SmallVector<double, 0> DVal; | |||
1036 | for (int i=0; i < getVecSize(FInfo); ++i) { | |||
1037 | double V = (getArgType(FInfo) == AMDGPULibFunc::F32) | |||
1038 | ? (double)CDV->getElementAsFloat(i) | |||
1039 | : CDV->getElementAsDouble(i); | |||
1040 | if (V < 0.0) needcopysign = true; | |||
1041 | V = log2(std::abs(V)); | |||
1042 | DVal.push_back(V); | |||
1043 | } | |||
1044 | if (getArgType(FInfo) == AMDGPULibFunc::F32) { | |||
1045 | SmallVector<float, 0> FVal; | |||
1046 | for (unsigned i=0; i < DVal.size(); ++i) { | |||
1047 | FVal.push_back((float)DVal[i]); | |||
1048 | } | |||
1049 | ArrayRef<float> tmp(FVal); | |||
1050 | cnval = ConstantDataVector::get(M->getContext(), tmp); | |||
1051 | } else { | |||
1052 | ArrayRef<double> tmp(DVal); | |||
1053 | cnval = ConstantDataVector::get(M->getContext(), tmp); | |||
1054 | } | |||
1055 | } | |||
1056 | } | |||
1057 | ||||
1058 | if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) { | |||
1059 | // We cannot handle corner cases for a general pow() function, give up | |||
1060 | // unless y is a constant integral value. Then proceed as if it were pown. | |||
1061 | if (getVecSize(FInfo) == 1) { | |||
1062 | if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) { | |||
1063 | double y = (getArgType(FInfo) == AMDGPULibFunc::F32) | |||
1064 | ? (double)CF->getValueAPF().convertToFloat() | |||
1065 | : CF->getValueAPF().convertToDouble(); | |||
1066 | if (y != (double)(int64_t)y) | |||
1067 | return false; | |||
1068 | } else | |||
1069 | return false; | |||
1070 | } else { | |||
1071 | if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) { | |||
1072 | for (int i=0; i < getVecSize(FInfo); ++i) { | |||
1073 | double y = (getArgType(FInfo) == AMDGPULibFunc::F32) | |||
1074 | ? (double)CDV->getElementAsFloat(i) | |||
1075 | : CDV->getElementAsDouble(i); | |||
1076 | if (y != (double)(int64_t)y) | |||
1077 | return false; | |||
1078 | } | |||
1079 | } else | |||
1080 | return false; | |||
1081 | } | |||
1082 | } | |||
1083 | ||||
1084 | Value *nval; | |||
1085 | if (needabs) { | |||
1086 | FunctionCallee AbsExpr = | |||
1087 | getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo)); | |||
1088 | if (!AbsExpr) | |||
1089 | return false; | |||
1090 | nval = CreateCallEx(B, AbsExpr, opr0, "__fabs"); | |||
1091 | } else { | |||
1092 | nval = cnval ? cnval : opr0; | |||
1093 | } | |||
1094 | if (needlog) { | |||
1095 | FunctionCallee LogExpr = | |||
1096 | getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo)); | |||
1097 | if (!LogExpr) | |||
1098 | return false; | |||
1099 | nval = CreateCallEx(B,LogExpr, nval, "__log2"); | |||
1100 | } | |||
1101 | ||||
1102 | if (FInfo.getId() == AMDGPULibFunc::EI_POWN) { | |||
1103 | // convert int(32) to fp(f32 or f64) | |||
1104 | opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F"); | |||
1105 | } | |||
1106 | nval = B.CreateFMul(opr1, nval, "__ylogx"); | |||
1107 | nval = CreateCallEx(B,ExpExpr, nval, "__exp2"); | |||
1108 | ||||
1109 | if (needcopysign) { | |||
1110 | Value *opr_n; | |||
1111 | Type* rTy = opr0->getType(); | |||
1112 | Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty(); | |||
1113 | Type *nTy = nTyS; | |||
1114 | if (const auto *vTy = dyn_cast<FixedVectorType>(rTy)) | |||
1115 | nTy = FixedVectorType::get(nTyS, vTy); | |||
1116 | unsigned size = nTy->getScalarSizeInBits(); | |||
1117 | opr_n = CI->getArgOperand(1); | |||
1118 | if (opr_n->getType()->isIntegerTy()) | |||
1119 | opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou"); | |||
1120 | else | |||
1121 | opr_n = B.CreateFPToSI(opr1, nTy, "__ytou"); | |||
1122 | ||||
1123 | Value *sign = B.CreateShl(opr_n, size-1, "__yeven"); | |||
1124 | sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign"); | |||
1125 | nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign); | |||
1126 | nval = B.CreateBitCast(nval, opr0->getType()); | |||
1127 | } | |||
1128 | ||||
1129 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { } while (false) | |||
1130 | << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n")do { } while (false); | |||
1131 | replaceCall(nval); | |||
1132 | ||||
1133 | return true; | |||
1134 | } | |||
1135 | ||||
1136 | bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B, | |||
1137 | const FuncInfo &FInfo) { | |||
1138 | Value *opr0 = CI->getArgOperand(0); | |||
1139 | Value *opr1 = CI->getArgOperand(1); | |||
1140 | ||||
1141 | ConstantInt *CINT = dyn_cast<ConstantInt>(opr1); | |||
1142 | if (!CINT) { | |||
1143 | return false; | |||
1144 | } | |||
1145 | int ci_opr1 = (int)CINT->getSExtValue(); | |||
1146 | if (ci_opr1 == 1) { // rootn(x, 1) = x | |||
1147 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n")do { } while (false); | |||
1148 | replaceCall(opr0); | |||
1149 | return true; | |||
1150 | } | |||
1151 | if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x) | |||
1152 | Module *M = CI->getModule(); | |||
1153 | if (FunctionCallee FPExpr = | |||
1154 | getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { | |||
1155 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n")do { } while (false); | |||
1156 | Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt"); | |||
1157 | replaceCall(nval); | |||
1158 | return true; | |||
1159 | } | |||
1160 | } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x) | |||
1161 | Module *M = CI->getModule(); | |||
1162 | if (FunctionCallee FPExpr = | |||
1163 | getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) { | |||
1164 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n")do { } while (false); | |||
1165 | Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt"); | |||
1166 | replaceCall(nval); | |||
1167 | return true; | |||
1168 | } | |||
1169 | } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x | |||
1170 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n")do { } while (false); | |||
1171 | Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), | |||
1172 | opr0, | |||
1173 | "__rootn2div"); | |||
1174 | replaceCall(nval); | |||
1175 | return true; | |||
1176 | } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x) | |||
1177 | Module *M = CI->getModule(); | |||
1178 | if (FunctionCallee FPExpr = | |||
1179 | getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) { | |||
1180 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0do { } while (false) | |||
1181 | << ")\n")do { } while (false); | |||
1182 | Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt"); | |||
1183 | replaceCall(nval); | |||
1184 | return true; | |||
1185 | } | |||
1186 | } | |||
1187 | return false; | |||
1188 | } | |||
1189 | ||||
1190 | bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B, | |||
1191 | const FuncInfo &FInfo) { | |||
1192 | Value *opr0 = CI->getArgOperand(0); | |||
1193 | Value *opr1 = CI->getArgOperand(1); | |||
1194 | Value *opr2 = CI->getArgOperand(2); | |||
1195 | ||||
1196 | ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0); | |||
1197 | ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1); | |||
1198 | if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) { | |||
1199 | // fma/mad(a, b, c) = c if a=0 || b=0 | |||
1200 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n")do { } while (false); | |||
1201 | replaceCall(opr2); | |||
1202 | return true; | |||
1203 | } | |||
1204 | if (CF0 && CF0->isExactlyValue(1.0f)) { | |||
1205 | // fma/mad(a, b, c) = b+c if a=1 | |||
1206 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2do { } while (false) | |||
1207 | << "\n")do { } while (false); | |||
1208 | Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd"); | |||
1209 | replaceCall(nval); | |||
1210 | return true; | |||
1211 | } | |||
1212 | if (CF1 && CF1->isExactlyValue(1.0f)) { | |||
1213 | // fma/mad(a, b, c) = a+c if b=1 | |||
1214 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2do { } while (false) | |||
1215 | << "\n")do { } while (false); | |||
1216 | Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd"); | |||
1217 | replaceCall(nval); | |||
1218 | return true; | |||
1219 | } | |||
1220 | if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) { | |||
1221 | if (CF->isZero()) { | |||
1222 | // fma/mad(a, b, c) = a*b if c=0 | |||
1223 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "do { } while (false) | |||
1224 | << *opr1 << "\n")do { } while (false); | |||
1225 | Value *nval = B.CreateFMul(opr0, opr1, "fmamul"); | |||
1226 | replaceCall(nval); | |||
1227 | return true; | |||
1228 | } | |||
1229 | } | |||
1230 | ||||
1231 | return false; | |||
1232 | } | |||
1233 | ||||
1234 | // Get a scalar native builtin signle argument FP function | |||
1235 | FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M, | |||
1236 | const FuncInfo &FInfo) { | |||
1237 | if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId())) | |||
1238 | return nullptr; | |||
1239 | FuncInfo nf = FInfo; | |||
1240 | nf.setPrefix(AMDGPULibFunc::NATIVE); | |||
1241 | return getFunction(M, nf); | |||
1242 | } | |||
1243 | ||||
1244 | // fold sqrt -> native_sqrt (x) | |||
1245 | bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B, | |||
1246 | const FuncInfo &FInfo) { | |||
1247 | if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) && | |||
1248 | (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) { | |||
1249 | if (FunctionCallee FPExpr = getNativeFunction( | |||
1250 | CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { | |||
1251 | Value *opr0 = CI->getArgOperand(0); | |||
1252 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { } while (false) | |||
1253 | << "sqrt(" << *opr0 << ")\n")do { } while (false); | |||
1254 | Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt"); | |||
1255 | replaceCall(nval); | |||
1256 | return true; | |||
1257 | } | |||
1258 | } | |||
1259 | return false; | |||
1260 | } | |||
1261 | ||||
1262 | // fold sin, cos -> sincos. | |||
1263 | bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, | |||
1264 | AliasAnalysis *AA) { | |||
1265 | AMDGPULibFunc fInfo; | |||
1266 | if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo)) | |||
1267 | return false; | |||
1268 | ||||
1269 | assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||((void)0) | |||
1270 | fInfo.getId() == AMDGPULibFunc::EI_COS)((void)0); | |||
1271 | bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN; | |||
1272 | ||||
1273 | Value *CArgVal = CI->getArgOperand(0); | |||
1274 | BasicBlock * const CBB = CI->getParent(); | |||
1275 | ||||
1276 | int const MaxScan = 30; | |||
1277 | bool Changed = false; | |||
1278 | ||||
1279 | { // fold in load value. | |||
1280 | LoadInst *LI = dyn_cast<LoadInst>(CArgVal); | |||
1281 | if (LI && LI->getParent() == CBB) { | |||
1282 | BasicBlock::iterator BBI = LI->getIterator(); | |||
1283 | Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA); | |||
1284 | if (AvailableVal) { | |||
1285 | Changed = true; | |||
1286 | CArgVal->replaceAllUsesWith(AvailableVal); | |||
1287 | if (CArgVal->getNumUses() == 0) | |||
1288 | LI->eraseFromParent(); | |||
1289 | CArgVal = CI->getArgOperand(0); | |||
1290 | } | |||
1291 | } | |||
1292 | } | |||
1293 | ||||
1294 | Module *M = CI->getModule(); | |||
1295 | fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN); | |||
1296 | std::string const PairName = fInfo.mangle(); | |||
1297 | ||||
1298 | CallInst *UI = nullptr; | |||
1299 | for (User* U : CArgVal->users()) { | |||
1300 | CallInst *XI = dyn_cast_or_null<CallInst>(U); | |||
1301 | if (!XI || XI == CI || XI->getParent() != CBB) | |||
1302 | continue; | |||
1303 | ||||
1304 | Function *UCallee = XI->getCalledFunction(); | |||
1305 | if (!UCallee || !UCallee->getName().equals(PairName)) | |||
1306 | continue; | |||
1307 | ||||
1308 | BasicBlock::iterator BBI = CI->getIterator(); | |||
1309 | if (BBI == CI->getParent()->begin()) | |||
1310 | break; | |||
1311 | --BBI; | |||
1312 | for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) { | |||
1313 | if (cast<Instruction>(BBI) == XI) { | |||
1314 | UI = XI; | |||
1315 | break; | |||
1316 | } | |||
1317 | } | |||
1318 | if (UI) break; | |||
1319 | } | |||
1320 | ||||
1321 | if (!UI) | |||
1322 | return Changed; | |||
1323 | ||||
1324 | // Merge the sin and cos. | |||
1325 | ||||
1326 | // for OpenCL 2.0 we have only generic implementation of sincos | |||
1327 | // function. | |||
1328 | AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo); | |||
1329 | nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS); | |||
1330 | FunctionCallee Fsincos = getFunction(M, nf); | |||
1331 | if (!Fsincos) | |||
1332 | return Changed; | |||
1333 | ||||
1334 | BasicBlock::iterator ItOld = B.GetInsertPoint(); | |||
1335 | AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_"); | |||
1336 | B.SetInsertPoint(UI); | |||
1337 | ||||
1338 | Value *P = Alloc; | |||
1339 | Type *PTy = Fsincos.getFunctionType()->getParamType(1); | |||
1340 | // The allocaInst allocates the memory in private address space. This need | |||
1341 | // to be bitcasted to point to the address space of cos pointer type. | |||
1342 | // In OpenCL 2.0 this is generic, while in 1.2 that is private. | |||
1343 | if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) | |||
1344 | P = B.CreateAddrSpaceCast(Alloc, PTy); | |||
1345 | CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P); | |||
1346 | ||||
1347 | LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "do { } while (false) | |||
1348 | << *Call << "\n")do { } while (false); | |||
1349 | ||||
1350 | if (!isSin) { // CI->cos, UI->sin | |||
1351 | B.SetInsertPoint(&*ItOld); | |||
1352 | UI->replaceAllUsesWith(&*Call); | |||
1353 | Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); | |||
1354 | CI->replaceAllUsesWith(Reload); | |||
1355 | UI->eraseFromParent(); | |||
1356 | CI->eraseFromParent(); | |||
1357 | } else { // CI->sin, UI->cos | |||
1358 | Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); | |||
1359 | UI->replaceAllUsesWith(Reload); | |||
1360 | CI->replaceAllUsesWith(Call); | |||
1361 | UI->eraseFromParent(); | |||
1362 | CI->eraseFromParent(); | |||
1363 | } | |||
1364 | return true; | |||
1365 | } | |||
1366 | ||||
1367 | bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) { | |||
1368 | if (!TM) | |||
1369 | return false; | |||
1370 | ||||
1371 | StringRef CPU = TM->getTargetCPU(); | |||
1372 | StringRef Features = TM->getTargetFeatureString(); | |||
1373 | if ((CPU.empty() || CPU.equals_insensitive("generic")) && | |||
1374 | (Features.empty() || | |||
1375 | Features.find_insensitive("wavefrontsize") == StringRef::npos)) | |||
1376 | return false; | |||
1377 | ||||
1378 | Function *F = CI->getParent()->getParent(); | |||
1379 | const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F); | |||
1380 | unsigned N = ST.getWavefrontSize(); | |||
1381 | ||||
1382 | LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "do { } while (false) | |||
1383 | << N << "\n")do { } while (false); | |||
1384 | ||||
1385 | CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N)); | |||
1386 | CI->eraseFromParent(); | |||
1387 | return true; | |||
1388 | } | |||
1389 | ||||
1390 | // Get insertion point at entry. | |||
1391 | BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) { | |||
1392 | Function * Func = UI->getParent()->getParent(); | |||
1393 | BasicBlock * BB = &Func->getEntryBlock(); | |||
1394 | assert(BB && "Entry block not found!")((void)0); | |||
1395 | BasicBlock::iterator ItNew = BB->begin(); | |||
1396 | return ItNew; | |||
1397 | } | |||
1398 | ||||
1399 | // Insert a AllocsInst at the beginning of function entry block. | |||
1400 | AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B, | |||
1401 | const char *prefix) { | |||
1402 | BasicBlock::iterator ItNew = getEntryIns(UI); | |||
1403 | Function *UCallee = UI->getCalledFunction(); | |||
1404 | Type *RetType = UCallee->getReturnType(); | |||
1405 | B.SetInsertPoint(&*ItNew); | |||
1406 | AllocaInst *Alloc = B.CreateAlloca(RetType, 0, | |||
1407 | std::string(prefix) + UI->getName()); | |||
1408 | Alloc->setAlignment( | |||
1409 | Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType))); | |||
1410 | return Alloc; | |||
1411 | } | |||
1412 | ||||
1413 | bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo, | |||
1414 | double& Res0, double& Res1, | |||
1415 | Constant *copr0, Constant *copr1, | |||
1416 | Constant *copr2) { | |||
1417 | // By default, opr0/opr1/opr3 holds values of float/double type. | |||
1418 | // If they are not float/double, each function has to its | |||
1419 | // operand separately. | |||
1420 | double opr0=0.0, opr1=0.0, opr2=0.0; | |||
1421 | ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0); | |||
1422 | ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1); | |||
1423 | ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2); | |||
1424 | if (fpopr0) { | |||
1425 | opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64) | |||
1426 | ? fpopr0->getValueAPF().convertToDouble() | |||
1427 | : (double)fpopr0->getValueAPF().convertToFloat(); | |||
1428 | } | |||
1429 | ||||
1430 | if (fpopr1) { | |||
1431 | opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64) | |||
1432 | ? fpopr1->getValueAPF().convertToDouble() | |||
1433 | : (double)fpopr1->getValueAPF().convertToFloat(); | |||
1434 | } | |||
1435 | ||||
1436 | if (fpopr2) { | |||
1437 | opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64) | |||
1438 | ? fpopr2->getValueAPF().convertToDouble() | |||
1439 | : (double)fpopr2->getValueAPF().convertToFloat(); | |||
1440 | } | |||
1441 | ||||
1442 | switch (FInfo.getId()) { | |||
1443 | default : return false; | |||
1444 | ||||
1445 | case AMDGPULibFunc::EI_ACOS: | |||
1446 | Res0 = acos(opr0); | |||
1447 | return true; | |||
1448 | ||||
1449 | case AMDGPULibFunc::EI_ACOSH: | |||
1450 | // acosh(x) == log(x + sqrt(x*x - 1)) | |||
1451 | Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0)); | |||
1452 | return true; | |||
1453 | ||||
1454 | case AMDGPULibFunc::EI_ACOSPI: | |||
1455 | Res0 = acos(opr0) / MATH_PInumbers::pi; | |||
1456 | return true; | |||
1457 | ||||
1458 | case AMDGPULibFunc::EI_ASIN: | |||
1459 | Res0 = asin(opr0); | |||
1460 | return true; | |||
1461 | ||||
1462 | case AMDGPULibFunc::EI_ASINH: | |||
1463 | // asinh(x) == log(x + sqrt(x*x + 1)) | |||
1464 | Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0)); | |||
1465 | return true; | |||
1466 | ||||
1467 | case AMDGPULibFunc::EI_ASINPI: | |||
1468 | Res0 = asin(opr0) / MATH_PInumbers::pi; | |||
1469 | return true; | |||
1470 | ||||
1471 | case AMDGPULibFunc::EI_ATAN: | |||
1472 | Res0 = atan(opr0); | |||
1473 | return true; | |||
1474 | ||||
1475 | case AMDGPULibFunc::EI_ATANH: | |||
1476 | // atanh(x) == (log(x+1) - log(x-1))/2; | |||
1477 | Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0; | |||
1478 | return true; | |||
1479 | ||||
1480 | case AMDGPULibFunc::EI_ATANPI: | |||
1481 | Res0 = atan(opr0) / MATH_PInumbers::pi; | |||
1482 | return true; | |||
1483 | ||||
1484 | case AMDGPULibFunc::EI_CBRT: | |||
1485 | Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0); | |||
1486 | return true; | |||
1487 | ||||
1488 | case AMDGPULibFunc::EI_COS: | |||
1489 | Res0 = cos(opr0); | |||
1490 | return true; | |||
1491 | ||||
1492 | case AMDGPULibFunc::EI_COSH: | |||
1493 | Res0 = cosh(opr0); | |||
1494 | return true; | |||
1495 | ||||
1496 | case AMDGPULibFunc::EI_COSPI: | |||
1497 | Res0 = cos(MATH_PInumbers::pi * opr0); | |||
1498 | return true; | |||
1499 | ||||
1500 | case AMDGPULibFunc::EI_EXP: | |||
1501 | Res0 = exp(opr0); | |||
1502 | return true; | |||
1503 | ||||
1504 | case AMDGPULibFunc::EI_EXP2: | |||
1505 | Res0 = pow(2.0, opr0); | |||
1506 | return true; | |||
1507 | ||||
1508 | case AMDGPULibFunc::EI_EXP10: | |||
1509 | Res0 = pow(10.0, opr0); | |||
1510 | return true; | |||
1511 | ||||
1512 | case AMDGPULibFunc::EI_EXPM1: | |||
1513 | Res0 = exp(opr0) - 1.0; | |||
1514 | return true; | |||
1515 | ||||
1516 | case AMDGPULibFunc::EI_LOG: | |||
1517 | Res0 = log(opr0); | |||
1518 | return true; | |||
1519 | ||||
1520 | case AMDGPULibFunc::EI_LOG2: | |||
1521 | Res0 = log(opr0) / log(2.0); | |||
1522 | return true; | |||
1523 | ||||
1524 | case AMDGPULibFunc::EI_LOG10: | |||
1525 | Res0 = log(opr0) / log(10.0); | |||
1526 | return true; | |||
1527 | ||||
1528 | case AMDGPULibFunc::EI_RSQRT: | |||
1529 | Res0 = 1.0 / sqrt(opr0); | |||
1530 | return true; | |||
1531 | ||||
1532 | case AMDGPULibFunc::EI_SIN: | |||
1533 | Res0 = sin(opr0); | |||
1534 | return true; | |||
1535 | ||||
1536 | case AMDGPULibFunc::EI_SINH: | |||
1537 | Res0 = sinh(opr0); | |||
1538 | return true; | |||
1539 | ||||
1540 | case AMDGPULibFunc::EI_SINPI: | |||
1541 | Res0 = sin(MATH_PInumbers::pi * opr0); | |||
1542 | return true; | |||
1543 | ||||
1544 | case AMDGPULibFunc::EI_SQRT: | |||
1545 | Res0 = sqrt(opr0); | |||
1546 | return true; | |||
1547 | ||||
1548 | case AMDGPULibFunc::EI_TAN: | |||
1549 | Res0 = tan(opr0); | |||
1550 | return true; | |||
1551 | ||||
1552 | case AMDGPULibFunc::EI_TANH: | |||
1553 | Res0 = tanh(opr0); | |||
1554 | return true; | |||
1555 | ||||
1556 | case AMDGPULibFunc::EI_TANPI: | |||
1557 | Res0 = tan(MATH_PInumbers::pi * opr0); | |||
1558 | return true; | |||
1559 | ||||
1560 | case AMDGPULibFunc::EI_RECIP: | |||
1561 | Res0 = 1.0 / opr0; | |||
1562 | return true; | |||
1563 | ||||
1564 | // two-arg functions | |||
1565 | case AMDGPULibFunc::EI_DIVIDE: | |||
1566 | Res0 = opr0 / opr1; | |||
1567 | return true; | |||
1568 | ||||
1569 | case AMDGPULibFunc::EI_POW: | |||
1570 | case AMDGPULibFunc::EI_POWR: | |||
1571 | Res0 = pow(opr0, opr1); | |||
1572 | return true; | |||
1573 | ||||
1574 | case AMDGPULibFunc::EI_POWN: { | |||
1575 | if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { | |||
1576 | double val = (double)iopr1->getSExtValue(); | |||
1577 | Res0 = pow(opr0, val); | |||
1578 | return true; | |||
1579 | } | |||
1580 | return false; | |||
1581 | } | |||
1582 | ||||
1583 | case AMDGPULibFunc::EI_ROOTN: { | |||
1584 | if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { | |||
1585 | double val = (double)iopr1->getSExtValue(); | |||
1586 | Res0 = pow(opr0, 1.0 / val); | |||
1587 | return true; | |||
1588 | } | |||
1589 | return false; | |||
1590 | } | |||
1591 | ||||
1592 | // with ptr arg | |||
1593 | case AMDGPULibFunc::EI_SINCOS: | |||
1594 | Res0 = sin(opr0); | |||
1595 | Res1 = cos(opr0); | |||
1596 | return true; | |||
1597 | ||||
1598 | // three-arg functions | |||
1599 | case AMDGPULibFunc::EI_FMA: | |||
1600 | case AMDGPULibFunc::EI_MAD: | |||
1601 | Res0 = opr0 * opr1 + opr2; | |||
1602 | return true; | |||
1603 | } | |||
1604 | ||||
1605 | return false; | |||
1606 | } | |||
1607 | ||||
1608 | bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) { | |||
1609 | int numArgs = (int)aCI->getNumArgOperands(); | |||
1610 | if (numArgs > 3) | |||
1611 | return false; | |||
1612 | ||||
1613 | Constant *copr0 = nullptr; | |||
1614 | Constant *copr1 = nullptr; | |||
1615 | Constant *copr2 = nullptr; | |||
1616 | if (numArgs > 0) { | |||
1617 | if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr) | |||
1618 | return false; | |||
1619 | } | |||
1620 | ||||
1621 | if (numArgs > 1) { | |||
1622 | if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) { | |||
1623 | if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS) | |||
1624 | return false; | |||
1625 | } | |||
1626 | } | |||
1627 | ||||
1628 | if (numArgs > 2) { | |||
1629 | if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr) | |||
1630 | return false; | |||
1631 | } | |||
1632 | ||||
1633 | // At this point, all arguments to aCI are constants. | |||
1634 | ||||
1635 | // max vector size is 16, and sincos will generate two results. | |||
1636 | double DVal0[16], DVal1[16]; | |||
1637 | bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS); | |||
1638 | if (getVecSize(FInfo) == 1) { | |||
1639 | if (!evaluateScalarMathFunc(FInfo, DVal0[0], | |||
1640 | DVal1[0], copr0, copr1, copr2)) { | |||
1641 | return false; | |||
1642 | } | |||
1643 | } else { | |||
1644 | ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0); | |||
1645 | ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1); | |||
1646 | ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2); | |||
1647 | for (int i=0; i < getVecSize(FInfo); ++i) { | |||
1648 | Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr; | |||
1649 | Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr; | |||
1650 | Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr; | |||
1651 | if (!evaluateScalarMathFunc(FInfo, DVal0[i], | |||
1652 | DVal1[i], celt0, celt1, celt2)) { | |||
1653 | return false; | |||
1654 | } | |||
1655 | } | |||
1656 | } | |||
1657 | ||||
1658 | LLVMContext &context = CI->getParent()->getParent()->getContext(); | |||
1659 | Constant *nval0, *nval1; | |||
1660 | if (getVecSize(FInfo) == 1) { | |||
1661 | nval0 = ConstantFP::get(CI->getType(), DVal0[0]); | |||
1662 | if (hasTwoResults) | |||
1663 | nval1 = ConstantFP::get(CI->getType(), DVal1[0]); | |||
1664 | } else { | |||
1665 | if (getArgType(FInfo) == AMDGPULibFunc::F32) { | |||
1666 | SmallVector <float, 0> FVal0, FVal1; | |||
1667 | for (int i=0; i < getVecSize(FInfo); ++i) | |||
1668 | FVal0.push_back((float)DVal0[i]); | |||
1669 | ArrayRef<float> tmp0(FVal0); | |||
1670 | nval0 = ConstantDataVector::get(context, tmp0); | |||
1671 | if (hasTwoResults) { | |||
1672 | for (int i=0; i < getVecSize(FInfo); ++i) | |||
1673 | FVal1.push_back((float)DVal1[i]); | |||
1674 | ArrayRef<float> tmp1(FVal1); | |||
1675 | nval1 = ConstantDataVector::get(context, tmp1); | |||
1676 | } | |||
1677 | } else { | |||
1678 | ArrayRef<double> tmp0(DVal0); | |||
1679 | nval0 = ConstantDataVector::get(context, tmp0); | |||
1680 | if (hasTwoResults) { | |||
1681 | ArrayRef<double> tmp1(DVal1); | |||
1682 | nval1 = ConstantDataVector::get(context, tmp1); | |||
1683 | } | |||
1684 | } | |||
1685 | } | |||
1686 | ||||
1687 | if (hasTwoResults) { | |||
1688 | // sincos | |||
1689 | assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&((void)0) | |||
1690 | "math function with ptr arg not supported yet")((void)0); | |||
1691 | new StoreInst(nval1, aCI->getArgOperand(1), aCI); | |||
1692 | } | |||
1693 | ||||
1694 | replaceCall(nval0); | |||
1695 | return true; | |||
1696 | } | |||
1697 | ||||
1698 | // Public interface to the Simplify LibCalls pass. | |||
1699 | FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetMachine *TM) { | |||
1700 | return new AMDGPUSimplifyLibCalls(TM); | |||
1701 | } | |||
1702 | ||||
1703 | FunctionPass *llvm::createAMDGPUUseNativeCallsPass() { | |||
1704 | return new AMDGPUUseNativeCalls(); | |||
1705 | } | |||
1706 | ||||
1707 | bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) { | |||
1708 | if (skipFunction(F)) | |||
1709 | return false; | |||
1710 | ||||
1711 | bool Changed = false; | |||
1712 | auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); | |||
1713 | ||||
1714 | LLVM_DEBUG(dbgs() << "AMDIC: process function ";do { } while (false) | |||
1715 | F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';)do { } while (false); | |||
1716 | ||||
1717 | for (auto &BB : F) { | |||
1718 | for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) { | |||
1719 | // Ignore non-calls. | |||
1720 | CallInst *CI = dyn_cast<CallInst>(I); | |||
1721 | ++I; | |||
1722 | // Ignore intrinsics that do not become real instructions. | |||
1723 | if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd()) | |||
1724 | continue; | |||
1725 | ||||
1726 | // Ignore indirect calls. | |||
1727 | Function *Callee = CI->getCalledFunction(); | |||
1728 | if (Callee == 0) continue; | |||
1729 | ||||
1730 | LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";do { } while (false) | |||
1731 | dbgs().flush())do { } while (false); | |||
1732 | if(Simplifier.fold(CI, AA)) | |||
1733 | Changed = true; | |||
1734 | } | |||
1735 | } | |||
1736 | return Changed; | |||
1737 | } | |||
1738 | ||||
1739 | PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F, | |||
1740 | FunctionAnalysisManager &AM) { | |||
1741 | AMDGPULibCalls Simplifier(&TM); | |||
1742 | Simplifier.initNativeFuncs(); | |||
1743 | ||||
1744 | bool Changed = false; | |||
1745 | auto AA = &AM.getResult<AAManager>(F); | |||
1746 | ||||
1747 | LLVM_DEBUG(dbgs() << "AMDIC: process function ";do { } while (false) | |||
| ||||
1748 | F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';)do { } while (false); | |||
1749 | ||||
1750 | for (auto &BB : F) { | |||
1751 | for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { | |||
1752 | // Ignore non-calls. | |||
1753 | CallInst *CI = dyn_cast<CallInst>(I); | |||
1754 | ++I; | |||
1755 | // Ignore intrinsics that do not become real instructions. | |||
1756 | if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd()) | |||
1757 | continue; | |||
1758 | ||||
1759 | // Ignore indirect calls. | |||
1760 | Function *Callee = CI->getCalledFunction(); | |||
1761 | if (Callee
| |||
1762 | continue; | |||
1763 | ||||
1764 | LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";do { } while (false) | |||
1765 | dbgs().flush())do { } while (false); | |||
1766 | if (Simplifier.fold(CI, AA)) | |||
1767 | Changed = true; | |||
1768 | } | |||
1769 | } | |||
1770 | return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); | |||
1771 | } | |||
1772 | ||||
1773 | bool AMDGPUUseNativeCalls::runOnFunction(Function &F) { | |||
1774 | if (skipFunction(F) || UseNative.empty()) | |||
1775 | return false; | |||
1776 | ||||
1777 | bool Changed = false; | |||
1778 | for (auto &BB : F) { | |||
1779 | for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) { | |||
1780 | // Ignore non-calls. | |||
1781 | CallInst *CI = dyn_cast<CallInst>(I); | |||
1782 | ++I; | |||
1783 | if (!CI) continue; | |||
1784 | ||||
1785 | // Ignore indirect calls. | |||
1786 | Function *Callee = CI->getCalledFunction(); | |||
1787 | if (Callee == 0) continue; | |||
1788 | ||||
1789 | if(Simplifier.useNative(CI)) | |||
1790 | Changed = true; | |||
1791 | } | |||
1792 | } | |||
1793 | return Changed; | |||
1794 | } | |||
1795 | ||||
1796 | PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F, | |||
1797 | FunctionAnalysisManager &AM) { | |||
1798 | if (UseNative.empty()) | |||
1799 | return PreservedAnalyses::all(); | |||
1800 | ||||
1801 | AMDGPULibCalls Simplifier; | |||
1802 | Simplifier.initNativeFuncs(); | |||
1803 | ||||
1804 | bool Changed = false; | |||
1805 | for (auto &BB : F) { | |||
1806 | for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { | |||
1807 | // Ignore non-calls. | |||
1808 | CallInst *CI = dyn_cast<CallInst>(I); | |||
1809 | ++I; | |||
1810 | if (!CI) | |||
1811 | continue; | |||
1812 | ||||
1813 | // Ignore indirect calls. | |||
1814 | Function *Callee = CI->getCalledFunction(); | |||
1815 | if (Callee == 0) | |||
1816 | continue; | |||
1817 | ||||
1818 | if (Simplifier.useNative(CI)) | |||
1819 | Changed = true; | |||
1820 | } | |||
1821 | } | |||
1822 | return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); | |||
1823 | } |