Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
Warning:line 874, column 15
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AMDGPULibCalls.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
1//===- AMDGPULibCalls.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file does AMD library function optimizations.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPULibFunc.h"
16#include "GCNSubtarget.h"
17#include "llvm/Analysis/AliasAnalysis.h"
18#include "llvm/Analysis/Loads.h"
19#include "llvm/IR/IntrinsicsAMDGPU.h"
20#include "llvm/IR/IRBuilder.h"
21#include "llvm/InitializePasses.h"
22#include "llvm/Target/TargetMachine.h"
23
24#define DEBUG_TYPE"amdgpu-simplifylib" "amdgpu-simplifylib"
25
26using namespace llvm;
27
28static cl::opt<bool> EnablePreLink("amdgpu-prelink",
29 cl::desc("Enable pre-link mode optimizations"),
30 cl::init(false),
31 cl::Hidden);
32
33static cl::list<std::string> UseNative("amdgpu-use-native",
34 cl::desc("Comma separated list of functions to replace with native, or all"),
35 cl::CommaSeparated, cl::ValueOptional,
36 cl::Hidden);
37
38#define MATH_PInumbers::pi numbers::pi
39#define MATH_Enumbers::e numbers::e
40#define MATH_SQRT2numbers::sqrt2 numbers::sqrt2
41#define MATH_SQRT1_2numbers::inv_sqrt2 numbers::inv_sqrt2
42
43namespace llvm {
44
45class AMDGPULibCalls {
46private:
47
48 typedef llvm::AMDGPULibFunc FuncInfo;
49
50 const TargetMachine *TM;
51
52 // -fuse-native.
53 bool AllNative = false;
54
55 bool useNativeFunc(const StringRef F) const;
56
57 // Return a pointer (pointer expr) to the function if function defintion with
58 // "FuncName" exists. It may create a new function prototype in pre-link mode.
59 FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
60
61 // Replace a normal function with its native version.
62 bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
63
64 bool parseFunctionName(const StringRef& FMangledName,
65 FuncInfo *FInfo=nullptr /*out*/);
66
67 bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
68
69 /* Specialized optimizations */
70
71 // recip (half or native)
72 bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
73
74 // divide (half or native)
75 bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
76
77 // pow/powr/pown
78 bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
79
80 // rootn
81 bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
82
83 // fma/mad
84 bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
85
86 // -fuse-native for sincos
87 bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
88
89 // evaluate calls if calls' arguments are constants.
90 bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0,
91 double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
92 bool evaluateCall(CallInst *aCI, FuncInfo &FInfo);
93
94 // exp
95 bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
96
97 // exp2
98 bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
99
100 // exp10
101 bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
102
103 // log
104 bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
105
106 // log2
107 bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
108
109 // log10
110 bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
111
112 // sqrt
113 bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
114
115 // sin/cos
116 bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
117
118 // __read_pipe/__write_pipe
119 bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
120
121 // llvm.amdgcn.wavefrontsize
122 bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
123
124 // Get insertion point at entry.
125 BasicBlock::iterator getEntryIns(CallInst * UI);
126 // Insert an Alloc instruction.
127 AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
128 // Get a scalar native builtin signle argument FP function
129 FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
130
131protected:
132 CallInst *CI;
133
134 bool isUnsafeMath(const CallInst *CI) const;
135
136 void replaceCall(Value *With) {
137 CI->replaceAllUsesWith(With);
138 CI->eraseFromParent();
139 }
140
141public:
142 AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
143
144 bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
145
146 void initNativeFuncs();
147
148 // Replace a normal math function call with that native version
149 bool useNative(CallInst *CI);
150};
151
152} // end llvm namespace
153
154namespace {
155
156 class AMDGPUSimplifyLibCalls : public FunctionPass {
157
158 AMDGPULibCalls Simplifier;
159
160 public:
161 static char ID; // Pass identification
162
163 AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr)
164 : FunctionPass(ID), Simplifier(TM) {
165 initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
166 }
167
168 void getAnalysisUsage(AnalysisUsage &AU) const override {
169 AU.addRequired<AAResultsWrapperPass>();
170 }
171
172 bool runOnFunction(Function &M) override;
173 };
174
175 class AMDGPUUseNativeCalls : public FunctionPass {
176
177 AMDGPULibCalls Simplifier;
178
179 public:
180 static char ID; // Pass identification
181
182 AMDGPUUseNativeCalls() : FunctionPass(ID) {
183 initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry());
184 Simplifier.initNativeFuncs();
185 }
186
187 bool runOnFunction(Function &F) override;
188 };
189
190} // end anonymous namespace.
191
192char AMDGPUSimplifyLibCalls::ID = 0;
193char AMDGPUUseNativeCalls::ID = 0;
194
195INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",static void *initializeAMDGPUSimplifyLibCallsPassOnce(PassRegistry
&Registry) {
196 "Simplify well-known AMD library calls", false, false)static void *initializeAMDGPUSimplifyLibCallsPassOnce(PassRegistry
&Registry) {
197INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
198INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",PassInfo *PI = new PassInfo( "Simplify well-known AMD library calls"
, "amdgpu-simplifylib", &AMDGPUSimplifyLibCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUSimplifyLibCalls>)
, false, false); Registry.registerPass(*PI, true); return PI;
} static llvm::once_flag InitializeAMDGPUSimplifyLibCallsPassFlag
; void llvm::initializeAMDGPUSimplifyLibCallsPass(PassRegistry
&Registry) { llvm::call_once(InitializeAMDGPUSimplifyLibCallsPassFlag
, initializeAMDGPUSimplifyLibCallsPassOnce, std::ref(Registry
)); }
199 "Simplify well-known AMD library calls", false, false)PassInfo *PI = new PassInfo( "Simplify well-known AMD library calls"
, "amdgpu-simplifylib", &AMDGPUSimplifyLibCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUSimplifyLibCalls>)
, false, false); Registry.registerPass(*PI, true); return PI;
} static llvm::once_flag InitializeAMDGPUSimplifyLibCallsPassFlag
; void llvm::initializeAMDGPUSimplifyLibCallsPass(PassRegistry
&Registry) { llvm::call_once(InitializeAMDGPUSimplifyLibCallsPassFlag
, initializeAMDGPUSimplifyLibCallsPassOnce, std::ref(Registry
)); }
200
201INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry
&Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions."
, "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void
llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry
) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce
, std::ref(Registry)); }
202 "Replace builtin math calls with that native versions.",static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry
&Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions."
, "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void
llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry
) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce
, std::ref(Registry)); }
203 false, false)static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry
&Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions."
, "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void
llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry
) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce
, std::ref(Registry)); }
204
205template <typename IRB>
206static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
207 const Twine &Name = "") {
208 CallInst *R = B.CreateCall(Callee, Arg, Name);
209 if (Function *F = dyn_cast<Function>(Callee.getCallee()))
210 R->setCallingConv(F->getCallingConv());
211 return R;
212}
213
214template <typename IRB>
215static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
216 Value *Arg2, const Twine &Name = "") {
217 CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
218 if (Function *F = dyn_cast<Function>(Callee.getCallee()))
219 R->setCallingConv(F->getCallingConv());
220 return R;
221}
222
223// Data structures for table-driven optimizations.
224// FuncTbl works for both f32 and f64 functions with 1 input argument
225
226struct TableEntry {
227 double result;
228 double input;
229};
230
231/* a list of {result, input} */
232static const TableEntry tbl_acos[] = {
233 {MATH_PInumbers::pi / 2.0, 0.0},
234 {MATH_PInumbers::pi / 2.0, -0.0},
235 {0.0, 1.0},
236 {MATH_PInumbers::pi, -1.0}
237};
238static const TableEntry tbl_acosh[] = {
239 {0.0, 1.0}
240};
241static const TableEntry tbl_acospi[] = {
242 {0.5, 0.0},
243 {0.5, -0.0},
244 {0.0, 1.0},
245 {1.0, -1.0}
246};
247static const TableEntry tbl_asin[] = {
248 {0.0, 0.0},
249 {-0.0, -0.0},
250 {MATH_PInumbers::pi / 2.0, 1.0},
251 {-MATH_PInumbers::pi / 2.0, -1.0}
252};
253static const TableEntry tbl_asinh[] = {
254 {0.0, 0.0},
255 {-0.0, -0.0}
256};
257static const TableEntry tbl_asinpi[] = {
258 {0.0, 0.0},
259 {-0.0, -0.0},
260 {0.5, 1.0},
261 {-0.5, -1.0}
262};
263static const TableEntry tbl_atan[] = {
264 {0.0, 0.0},
265 {-0.0, -0.0},
266 {MATH_PInumbers::pi / 4.0, 1.0},
267 {-MATH_PInumbers::pi / 4.0, -1.0}
268};
269static const TableEntry tbl_atanh[] = {
270 {0.0, 0.0},
271 {-0.0, -0.0}
272};
273static const TableEntry tbl_atanpi[] = {
274 {0.0, 0.0},
275 {-0.0, -0.0},
276 {0.25, 1.0},
277 {-0.25, -1.0}
278};
279static const TableEntry tbl_cbrt[] = {
280 {0.0, 0.0},
281 {-0.0, -0.0},
282 {1.0, 1.0},
283 {-1.0, -1.0},
284};
285static const TableEntry tbl_cos[] = {
286 {1.0, 0.0},
287 {1.0, -0.0}
288};
289static const TableEntry tbl_cosh[] = {
290 {1.0, 0.0},
291 {1.0, -0.0}
292};
293static const TableEntry tbl_cospi[] = {
294 {1.0, 0.0},
295 {1.0, -0.0}
296};
297static const TableEntry tbl_erfc[] = {
298 {1.0, 0.0},
299 {1.0, -0.0}
300};
301static const TableEntry tbl_erf[] = {
302 {0.0, 0.0},
303 {-0.0, -0.0}
304};
305static const TableEntry tbl_exp[] = {
306 {1.0, 0.0},
307 {1.0, -0.0},
308 {MATH_Enumbers::e, 1.0}
309};
310static const TableEntry tbl_exp2[] = {
311 {1.0, 0.0},
312 {1.0, -0.0},
313 {2.0, 1.0}
314};
315static const TableEntry tbl_exp10[] = {
316 {1.0, 0.0},
317 {1.0, -0.0},
318 {10.0, 1.0}
319};
320static const TableEntry tbl_expm1[] = {
321 {0.0, 0.0},
322 {-0.0, -0.0}
323};
324static const TableEntry tbl_log[] = {
325 {0.0, 1.0},
326 {1.0, MATH_Enumbers::e}
327};
328static const TableEntry tbl_log2[] = {
329 {0.0, 1.0},
330 {1.0, 2.0}
331};
332static const TableEntry tbl_log10[] = {
333 {0.0, 1.0},
334 {1.0, 10.0}
335};
336static const TableEntry tbl_rsqrt[] = {
337 {1.0, 1.0},
338 {MATH_SQRT1_2numbers::inv_sqrt2, 2.0}
339};
340static const TableEntry tbl_sin[] = {
341 {0.0, 0.0},
342 {-0.0, -0.0}
343};
344static const TableEntry tbl_sinh[] = {
345 {0.0, 0.0},
346 {-0.0, -0.0}
347};
348static const TableEntry tbl_sinpi[] = {
349 {0.0, 0.0},
350 {-0.0, -0.0}
351};
352static const TableEntry tbl_sqrt[] = {
353 {0.0, 0.0},
354 {1.0, 1.0},
355 {MATH_SQRT2numbers::sqrt2, 2.0}
356};
357static const TableEntry tbl_tan[] = {
358 {0.0, 0.0},
359 {-0.0, -0.0}
360};
361static const TableEntry tbl_tanh[] = {
362 {0.0, 0.0},
363 {-0.0, -0.0}
364};
365static const TableEntry tbl_tanpi[] = {
366 {0.0, 0.0},
367 {-0.0, -0.0}
368};
369static const TableEntry tbl_tgamma[] = {
370 {1.0, 1.0},
371 {1.0, 2.0},
372 {2.0, 3.0},
373 {6.0, 4.0}
374};
375
376static bool HasNative(AMDGPULibFunc::EFuncId id) {
377 switch(id) {
378 case AMDGPULibFunc::EI_DIVIDE:
379 case AMDGPULibFunc::EI_COS:
380 case AMDGPULibFunc::EI_EXP:
381 case AMDGPULibFunc::EI_EXP2:
382 case AMDGPULibFunc::EI_EXP10:
383 case AMDGPULibFunc::EI_LOG:
384 case AMDGPULibFunc::EI_LOG2:
385 case AMDGPULibFunc::EI_LOG10:
386 case AMDGPULibFunc::EI_POWR:
387 case AMDGPULibFunc::EI_RECIP:
388 case AMDGPULibFunc::EI_RSQRT:
389 case AMDGPULibFunc::EI_SIN:
390 case AMDGPULibFunc::EI_SINCOS:
391 case AMDGPULibFunc::EI_SQRT:
392 case AMDGPULibFunc::EI_TAN:
393 return true;
394 default:;
395 }
396 return false;
397}
398
399struct TableRef {
400 size_t size;
401 const TableEntry *table; // variable size: from 0 to (size - 1)
402
403 TableRef() : size(0), table(nullptr) {}
404
405 template <size_t N>
406 TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
407};
408
409static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
410 switch(id) {
411 case AMDGPULibFunc::EI_ACOS: return TableRef(tbl_acos);
412 case AMDGPULibFunc::EI_ACOSH: return TableRef(tbl_acosh);
413 case AMDGPULibFunc::EI_ACOSPI: return TableRef(tbl_acospi);
414 case AMDGPULibFunc::EI_ASIN: return TableRef(tbl_asin);
415 case AMDGPULibFunc::EI_ASINH: return TableRef(tbl_asinh);
416 case AMDGPULibFunc::EI_ASINPI: return TableRef(tbl_asinpi);
417 case AMDGPULibFunc::EI_ATAN: return TableRef(tbl_atan);
418 case AMDGPULibFunc::EI_ATANH: return TableRef(tbl_atanh);
419 case AMDGPULibFunc::EI_ATANPI: return TableRef(tbl_atanpi);
420 case AMDGPULibFunc::EI_CBRT: return TableRef(tbl_cbrt);
421 case AMDGPULibFunc::EI_NCOS:
422 case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos);
423 case AMDGPULibFunc::EI_COSH: return TableRef(tbl_cosh);
424 case AMDGPULibFunc::EI_COSPI: return TableRef(tbl_cospi);
425 case AMDGPULibFunc::EI_ERFC: return TableRef(tbl_erfc);
426 case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf);
427 case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp);
428 case AMDGPULibFunc::EI_NEXP2:
429 case AMDGPULibFunc::EI_EXP2: return TableRef(tbl_exp2);
430 case AMDGPULibFunc::EI_EXP10: return TableRef(tbl_exp10);
431 case AMDGPULibFunc::EI_EXPM1: return TableRef(tbl_expm1);
432 case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log);
433 case AMDGPULibFunc::EI_NLOG2:
434 case AMDGPULibFunc::EI_LOG2: return TableRef(tbl_log2);
435 case AMDGPULibFunc::EI_LOG10: return TableRef(tbl_log10);
436 case AMDGPULibFunc::EI_NRSQRT:
437 case AMDGPULibFunc::EI_RSQRT: return TableRef(tbl_rsqrt);
438 case AMDGPULibFunc::EI_NSIN:
439 case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin);
440 case AMDGPULibFunc::EI_SINH: return TableRef(tbl_sinh);
441 case AMDGPULibFunc::EI_SINPI: return TableRef(tbl_sinpi);
442 case AMDGPULibFunc::EI_NSQRT:
443 case AMDGPULibFunc::EI_SQRT: return TableRef(tbl_sqrt);
444 case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan);
445 case AMDGPULibFunc::EI_TANH: return TableRef(tbl_tanh);
446 case AMDGPULibFunc::EI_TANPI: return TableRef(tbl_tanpi);
447 case AMDGPULibFunc::EI_TGAMMA: return TableRef(tbl_tgamma);
448 default:;
449 }
450 return TableRef();
451}
452
453static inline int getVecSize(const AMDGPULibFunc& FInfo) {
454 return FInfo.getLeads()[0].VectorSize;
455}
456
457static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
458 return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
459}
460
461FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
462 // If we are doing PreLinkOpt, the function is external. So it is safe to
463 // use getOrInsertFunction() at this stage.
464
465 return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
466 : AMDGPULibFunc::getFunction(M, fInfo);
467}
468
469bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
470 FuncInfo *FInfo) {
471 return AMDGPULibFunc::parse(FMangledName, *FInfo);
472}
473
474bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
475 if (auto Op = dyn_cast<FPMathOperator>(CI))
476 if (Op->isFast())
477 return true;
478 const Function *F = CI->getParent()->getParent();
479 Attribute Attr = F->getFnAttribute("unsafe-fp-math");
480 return Attr.getValueAsBool();
481}
482
483bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
484 return AllNative || llvm::is_contained(UseNative, F);
485}
486
487void AMDGPULibCalls::initNativeFuncs() {
488 AllNative = useNativeFunc("all") ||
489 (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
490 UseNative.begin()->empty());
491}
492
493bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
494 bool native_sin = useNativeFunc("sin");
495 bool native_cos = useNativeFunc("cos");
496
497 if (native_sin && native_cos) {
498 Module *M = aCI->getModule();
499 Value *opr0 = aCI->getArgOperand(0);
500
501 AMDGPULibFunc nf;
502 nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
503 nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
504
505 nf.setPrefix(AMDGPULibFunc::NATIVE);
506 nf.setId(AMDGPULibFunc::EI_SIN);
507 FunctionCallee sinExpr = getFunction(M, nf);
508
509 nf.setPrefix(AMDGPULibFunc::NATIVE);
510 nf.setId(AMDGPULibFunc::EI_COS);
511 FunctionCallee cosExpr = getFunction(M, nf);
512 if (sinExpr && cosExpr) {
513 Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
514 Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
515 new StoreInst(cosval, aCI->getArgOperand(1), aCI);
516
517 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCIdo { } while (false)
518 << " with native version of sin/cos")do { } while (false);
519
520 replaceCall(sinval);
521 return true;
522 }
523 }
524 return false;
525}
526
527bool AMDGPULibCalls::useNative(CallInst *aCI) {
528 CI = aCI;
529 Function *Callee = aCI->getCalledFunction();
530
531 FuncInfo FInfo;
532 if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() ||
533 FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
534 getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
535 !(AllNative || useNativeFunc(FInfo.getName()))) {
536 return false;
537 }
538
539 if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
540 return sincosUseNative(aCI, FInfo);
541
542 FInfo.setPrefix(AMDGPULibFunc::NATIVE);
543 FunctionCallee F = getFunction(aCI->getModule(), FInfo);
544 if (!F)
545 return false;
546
547 aCI->setCalledFunction(F);
548 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCIdo { } while (false)
549 << " with native version")do { } while (false);
550 return true;
551}
552
553// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
554// builtin, with appended type size and alignment arguments, where 2 or 4
555// indicates the original number of arguments. The library has optimized version
556// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
557// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
558// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
559// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
560bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
561 FuncInfo &FInfo) {
562 auto *Callee = CI->getCalledFunction();
563 if (!Callee->isDeclaration())
564 return false;
565
566 assert(Callee->hasName() && "Invalid read_pipe/write_pipe function")((void)0);
567 auto *M = Callee->getParent();
568 auto &Ctx = M->getContext();
569 std::string Name = std::string(Callee->getName());
570 auto NumArg = CI->getNumArgOperands();
571 if (NumArg != 4 && NumArg != 6)
572 return false;
573 auto *PacketSize = CI->getArgOperand(NumArg - 2);
574 auto *PacketAlign = CI->getArgOperand(NumArg - 1);
575 if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
576 return false;
577 unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
578 Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue();
579 if (Alignment != Size)
580 return false;
581
582 Type *PtrElemTy;
583 if (Size <= 8)
584 PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
585 else
586 PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
587 unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
588 auto PtrArg = CI->getArgOperand(PtrArgLoc);
589 unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
590 auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
591
592 SmallVector<llvm::Type *, 6> ArgTys;
593 for (unsigned I = 0; I != PtrArgLoc; ++I)
594 ArgTys.push_back(CI->getArgOperand(I)->getType());
595 ArgTys.push_back(PtrTy);
596
597 Name = Name + "_" + std::to_string(Size);
598 auto *FTy = FunctionType::get(Callee->getReturnType(),
599 ArrayRef<Type *>(ArgTys), false);
600 AMDGPULibFunc NewLibFunc(Name, FTy);
601 FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
602 if (!F)
603 return false;
604
605 auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
606 SmallVector<Value *, 6> Args;
607 for (unsigned I = 0; I != PtrArgLoc; ++I)
608 Args.push_back(CI->getArgOperand(I));
609 Args.push_back(BCast);
610
611 auto *NCI = B.CreateCall(F, Args);
612 NCI->setAttributes(CI->getAttributes());
613 CI->replaceAllUsesWith(NCI);
614 CI->dropAllReferences();
615 CI->eraseFromParent();
616
617 return true;
618}
619
620// This function returns false if no change; return true otherwise.
621bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
622 this->CI = CI;
623 Function *Callee = CI->getCalledFunction();
624
625 // Ignore indirect calls.
626 if (Callee == 0) return false;
627
628 BasicBlock *BB = CI->getParent();
629 LLVMContext &Context = CI->getParent()->getContext();
630 IRBuilder<> B(Context);
631
632 // Set the builder to the instruction after the call.
633 B.SetInsertPoint(BB, CI->getIterator());
634
635 // Copy fast flags from the original call.
636 if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
637 B.setFastMathFlags(FPOp->getFastMathFlags());
638
639 switch (Callee->getIntrinsicID()) {
640 default:
641 break;
642 case Intrinsic::amdgcn_wavefrontsize:
643 return !EnablePreLink && fold_wavefrontsize(CI, B);
644 }
645
646 FuncInfo FInfo;
647 if (!parseFunctionName(Callee->getName(), &FInfo))
648 return false;
649
650 // Further check the number of arguments to see if they match.
651 if (CI->getNumArgOperands() != FInfo.getNumArgs())
652 return false;
653
654 if (TDOFold(CI, FInfo))
655 return true;
656
657 // Under unsafe-math, evaluate calls if possible.
658 // According to Brian Sumner, we can do this for all f32 function calls
659 // using host's double function calls.
660 if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
661 return true;
662
663 // Specilized optimizations for each function call
664 switch (FInfo.getId()) {
665 case AMDGPULibFunc::EI_RECIP:
666 // skip vector function
667 assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||((void)0)
668 FInfo.getPrefix() == AMDGPULibFunc::HALF) &&((void)0)
669 "recip must be an either native or half function")((void)0);
670 return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
671
672 case AMDGPULibFunc::EI_DIVIDE:
673 // skip vector function
674 assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||((void)0)
675 FInfo.getPrefix() == AMDGPULibFunc::HALF) &&((void)0)
676 "divide must be an either native or half function")((void)0);
677 return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
678
679 case AMDGPULibFunc::EI_POW:
680 case AMDGPULibFunc::EI_POWR:
681 case AMDGPULibFunc::EI_POWN:
682 return fold_pow(CI, B, FInfo);
683
684 case AMDGPULibFunc::EI_ROOTN:
685 // skip vector function
686 return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
687
688 case AMDGPULibFunc::EI_FMA:
689 case AMDGPULibFunc::EI_MAD:
690 case AMDGPULibFunc::EI_NFMA:
691 // skip vector function
692 return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
693
694 case AMDGPULibFunc::EI_SQRT:
695 return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
696 case AMDGPULibFunc::EI_COS:
697 case AMDGPULibFunc::EI_SIN:
698 if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
699 getArgType(FInfo) == AMDGPULibFunc::F64)
700 && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
701 return fold_sincos(CI, B, AA);
702
703 break;
704 case AMDGPULibFunc::EI_READ_PIPE_2:
705 case AMDGPULibFunc::EI_READ_PIPE_4:
706 case AMDGPULibFunc::EI_WRITE_PIPE_2:
707 case AMDGPULibFunc::EI_WRITE_PIPE_4:
708 return fold_read_write_pipe(CI, B, FInfo);
709
710 default:
711 break;
712 }
713
714 return false;
715}
716
717bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
718 // Table-Driven optimization
719 const TableRef tr = getOptTable(FInfo.getId());
720 if (tr.size==0)
721 return false;
722
723 int const sz = (int)tr.size;
724 const TableEntry * const ftbl = tr.table;
725 Value *opr0 = CI->getArgOperand(0);
726
727 if (getVecSize(FInfo) > 1) {
728 if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
729 SmallVector<double, 0> DVal;
730 for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
731 ConstantFP *eltval = dyn_cast<ConstantFP>(
732 CV->getElementAsConstant((unsigned)eltNo));
733 assert(eltval && "Non-FP arguments in math function!")((void)0);
734 bool found = false;
735 for (int i=0; i < sz; ++i) {
736 if (eltval->isExactlyValue(ftbl[i].input)) {
737 DVal.push_back(ftbl[i].result);
738 found = true;
739 break;
740 }
741 }
742 if (!found) {
743 // This vector constants not handled yet.
744 return false;
745 }
746 }
747 LLVMContext &context = CI->getParent()->getParent()->getContext();
748 Constant *nval;
749 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
750 SmallVector<float, 0> FVal;
751 for (unsigned i = 0; i < DVal.size(); ++i) {
752 FVal.push_back((float)DVal[i]);
753 }
754 ArrayRef<float> tmp(FVal);
755 nval = ConstantDataVector::get(context, tmp);
756 } else { // F64
757 ArrayRef<double> tmp(DVal);
758 nval = ConstantDataVector::get(context, tmp);
759 }
760 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n")do { } while (false);
761 replaceCall(nval);
762 return true;
763 }
764 } else {
765 // Scalar version
766 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
767 for (int i = 0; i < sz; ++i) {
768 if (CF->isExactlyValue(ftbl[i].input)) {
769 Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
770 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n")do { } while (false);
771 replaceCall(nval);
772 return true;
773 }
774 }
775 }
776 }
777
778 return false;
779}
780
781bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
782 Module *M = CI->getModule();
783 if (getArgType(FInfo) != AMDGPULibFunc::F32 ||
784 FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
785 !HasNative(FInfo.getId()))
786 return false;
787
788 AMDGPULibFunc nf = FInfo;
789 nf.setPrefix(AMDGPULibFunc::NATIVE);
790 if (FunctionCallee FPExpr = getFunction(M, nf)) {
791 LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ")do { } while (false);
792
793 CI->setCalledFunction(FPExpr);
794
795 LLVM_DEBUG(dbgs() << *CI << '\n')do { } while (false);
796
797 return true;
798 }
799 return false;
800}
801
802// [native_]half_recip(c) ==> 1.0/c
803bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
804 const FuncInfo &FInfo) {
805 Value *opr0 = CI->getArgOperand(0);
806 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
807 // Just create a normal div. Later, InstCombine will be able
808 // to compute the divide into a constant (avoid check float infinity
809 // or subnormal at this point).
810 Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
811 opr0,
812 "recip2div");
813 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n")do { } while (false);
814 replaceCall(nval);
815 return true;
816 }
817 return false;
818}
819
820// [native_]half_divide(x, c) ==> x/c
821bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
822 const FuncInfo &FInfo) {
823 Value *opr0 = CI->getArgOperand(0);
824 Value *opr1 = CI->getArgOperand(1);
825 ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
826 ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
827
828 if ((CF0 && CF1) || // both are constants
829 (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
830 // CF1 is constant && f32 divide
831 {
832 Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
833 opr1, "__div2recip");
834 Value *nval = B.CreateFMul(opr0, nval1, "__div2mul");
835 replaceCall(nval);
836 return true;
837 }
838 return false;
839}
840
841namespace llvm {
842static double log2(double V) {
843#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
844 return ::log2(V);
845#else
846 return log(V) / numbers::ln2;
847#endif
848}
849}
850
851bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
852 const FuncInfo &FInfo) {
853 assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||((void)0)
854 FInfo.getId() == AMDGPULibFunc::EI_POWR ||((void)0)
855 FInfo.getId() == AMDGPULibFunc::EI_POWN) &&((void)0)
856 "fold_pow: encounter a wrong function call")((void)0);
857
858 Value *opr0, *opr1;
859 ConstantFP *CF;
860 ConstantInt *CINT;
861 ConstantAggregateZero *CZero;
862 Type *eltType;
863
864 opr0 = CI->getArgOperand(0);
865 opr1 = CI->getArgOperand(1);
866 CZero = dyn_cast<ConstantAggregateZero>(opr1);
867 if (getVecSize(FInfo) == 1) {
1
Assuming the condition is false
2
Taking false branch
868 eltType = opr0->getType();
869 CF = dyn_cast<ConstantFP>(opr1);
870 CINT = dyn_cast<ConstantInt>(opr1);
871 } else {
872 VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
3
Assuming the object is not a 'VectorType'
4
'VTy' initialized to a null pointer value
873 assert(VTy && "Oprand of vector function should be of vectortype")((void)0);
874 eltType = VTy->getElementType();
5
Called C++ object pointer is null
875 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
876
877 // Now, only Handle vector const whose elements have the same value.
878 CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
879 CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
880 }
881
882 // No unsafe math , no constant argument, do nothing
883 if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
884 return false;
885
886 // 0x1111111 means that we don't do anything for this call.
887 int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
888
889 if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
890 // pow/powr/pown(x, 0) == 1
891 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n")do { } while (false);
892 Constant *cnval = ConstantFP::get(eltType, 1.0);
893 if (getVecSize(FInfo) > 1) {
894 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
895 }
896 replaceCall(cnval);
897 return true;
898 }
899 if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
900 // pow/powr/pown(x, 1.0) = x
901 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n")do { } while (false);
902 replaceCall(opr0);
903 return true;
904 }
905 if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
906 // pow/powr/pown(x, 2.0) = x*x
907 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0do { } while (false)
908 << "\n")do { } while (false);
909 Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
910 replaceCall(nval);
911 return true;
912 }
913 if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
914 // pow/powr/pown(x, -1.0) = 1.0/x
915 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n")do { } while (false);
916 Constant *cnval = ConstantFP::get(eltType, 1.0);
917 if (getVecSize(FInfo) > 1) {
918 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
919 }
920 Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
921 replaceCall(nval);
922 return true;
923 }
924
925 Module *M = CI->getModule();
926 if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
927 // pow[r](x, [-]0.5) = sqrt(x)
928 bool issqrt = CF->isExactlyValue(0.5);
929 if (FunctionCallee FPExpr =
930 getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
931 : AMDGPULibFunc::EI_RSQRT,
932 FInfo))) {
933 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { } while (false)
934 << FInfo.getName().c_str() << "(" << *opr0 << ")\n")do { } while (false);
935 Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
936 : "__pow2rsqrt");
937 replaceCall(nval);
938 return true;
939 }
940 }
941
942 if (!isUnsafeMath(CI))
943 return false;
944
945 // Unsafe Math optimization
946
947 // Remember that ci_opr1 is set if opr1 is integral
948 if (CF) {
949 double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
950 ? (double)CF->getValueAPF().convertToFloat()
951 : CF->getValueAPF().convertToDouble();
952 int ival = (int)dval;
953 if ((double)ival == dval) {
954 ci_opr1 = ival;
955 } else
956 ci_opr1 = 0x11111111;
957 }
958
959 // pow/powr/pown(x, c) = [1/](x*x*..x); where
960 // trunc(c) == c && the number of x == c && |c| <= 12
961 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
962 if (abs_opr1 <= 12) {
963 Constant *cnval;
964 Value *nval;
965 if (abs_opr1 == 0) {
966 cnval = ConstantFP::get(eltType, 1.0);
967 if (getVecSize(FInfo) > 1) {
968 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
969 }
970 nval = cnval;
971 } else {
972 Value *valx2 = nullptr;
973 nval = nullptr;
974 while (abs_opr1 > 0) {
975 valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
976 if (abs_opr1 & 1) {
977 nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
978 }
979 abs_opr1 >>= 1;
980 }
981 }
982
983 if (ci_opr1 < 0) {
984 cnval = ConstantFP::get(eltType, 1.0);
985 if (getVecSize(FInfo) > 1) {
986 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
987 }
988 nval = B.CreateFDiv(cnval, nval, "__1powprod");
989 }
990 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { } while (false)
991 << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0do { } while (false)
992 << ")\n")do { } while (false);
993 replaceCall(nval);
994 return true;
995 }
996
997 // powr ---> exp2(y * log2(x))
998 // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
999 FunctionCallee ExpExpr =
1000 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
1001 if (!ExpExpr)
1002 return false;
1003
1004 bool needlog = false;
1005 bool needabs = false;
1006 bool needcopysign = false;
1007 Constant *cnval = nullptr;
1008 if (getVecSize(FInfo) == 1) {
1009 CF = dyn_cast<ConstantFP>(opr0);
1010
1011 if (CF) {
1012 double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
1013 ? (double)CF->getValueAPF().convertToFloat()
1014 : CF->getValueAPF().convertToDouble();
1015
1016 V = log2(std::abs(V));
1017 cnval = ConstantFP::get(eltType, V);
1018 needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
1019 CF->isNegative();
1020 } else {
1021 needlog = true;
1022 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1023 (!CF || CF->isNegative());
1024 }
1025 } else {
1026 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
1027
1028 if (!CDV) {
1029 needlog = true;
1030 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
1031 } else {
1032 assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&((void)0)
1033 "Wrong vector size detected")((void)0);
1034
1035 SmallVector<double, 0> DVal;
1036 for (int i=0; i < getVecSize(FInfo); ++i) {
1037 double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
1038 ? (double)CDV->getElementAsFloat(i)
1039 : CDV->getElementAsDouble(i);
1040 if (V < 0.0) needcopysign = true;
1041 V = log2(std::abs(V));
1042 DVal.push_back(V);
1043 }
1044 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1045 SmallVector<float, 0> FVal;
1046 for (unsigned i=0; i < DVal.size(); ++i) {
1047 FVal.push_back((float)DVal[i]);
1048 }
1049 ArrayRef<float> tmp(FVal);
1050 cnval = ConstantDataVector::get(M->getContext(), tmp);
1051 } else {
1052 ArrayRef<double> tmp(DVal);
1053 cnval = ConstantDataVector::get(M->getContext(), tmp);
1054 }
1055 }
1056 }
1057
1058 if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
1059 // We cannot handle corner cases for a general pow() function, give up
1060 // unless y is a constant integral value. Then proceed as if it were pown.
1061 if (getVecSize(FInfo) == 1) {
1062 if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
1063 double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1064 ? (double)CF->getValueAPF().convertToFloat()
1065 : CF->getValueAPF().convertToDouble();
1066 if (y != (double)(int64_t)y)
1067 return false;
1068 } else
1069 return false;
1070 } else {
1071 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
1072 for (int i=0; i < getVecSize(FInfo); ++i) {
1073 double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1074 ? (double)CDV->getElementAsFloat(i)
1075 : CDV->getElementAsDouble(i);
1076 if (y != (double)(int64_t)y)
1077 return false;
1078 }
1079 } else
1080 return false;
1081 }
1082 }
1083
1084 Value *nval;
1085 if (needabs) {
1086 FunctionCallee AbsExpr =
1087 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
1088 if (!AbsExpr)
1089 return false;
1090 nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
1091 } else {
1092 nval = cnval ? cnval : opr0;
1093 }
1094 if (needlog) {
1095 FunctionCallee LogExpr =
1096 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
1097 if (!LogExpr)
1098 return false;
1099 nval = CreateCallEx(B,LogExpr, nval, "__log2");
1100 }
1101
1102 if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
1103 // convert int(32) to fp(f32 or f64)
1104 opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
1105 }
1106 nval = B.CreateFMul(opr1, nval, "__ylogx");
1107 nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
1108
1109 if (needcopysign) {
1110 Value *opr_n;
1111 Type* rTy = opr0->getType();
1112 Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
1113 Type *nTy = nTyS;
1114 if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
1115 nTy = FixedVectorType::get(nTyS, vTy);
1116 unsigned size = nTy->getScalarSizeInBits();
1117 opr_n = CI->getArgOperand(1);
1118 if (opr_n->getType()->isIntegerTy())
1119 opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
1120 else
1121 opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
1122
1123 Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
1124 sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1125 nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
1126 nval = B.CreateBitCast(nval, opr0->getType());
1127 }
1128
1129 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { } while (false)
1130 << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n")do { } while (false);
1131 replaceCall(nval);
1132
1133 return true;
1134}
1135
1136bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
1137 const FuncInfo &FInfo) {
1138 Value *opr0 = CI->getArgOperand(0);
1139 Value *opr1 = CI->getArgOperand(1);
1140
1141 ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
1142 if (!CINT) {
1143 return false;
1144 }
1145 int ci_opr1 = (int)CINT->getSExtValue();
1146 if (ci_opr1 == 1) { // rootn(x, 1) = x
1147 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n")do { } while (false);
1148 replaceCall(opr0);
1149 return true;
1150 }
1151 if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
1152 Module *M = CI->getModule();
1153 if (FunctionCallee FPExpr =
1154 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
1155 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n")do { } while (false);
1156 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
1157 replaceCall(nval);
1158 return true;
1159 }
1160 } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
1161 Module *M = CI->getModule();
1162 if (FunctionCallee FPExpr =
1163 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1164 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n")do { } while (false);
1165 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
1166 replaceCall(nval);
1167 return true;
1168 }
1169 } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1170 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n")do { } while (false);
1171 Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
1172 opr0,
1173 "__rootn2div");
1174 replaceCall(nval);
1175 return true;
1176 } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
1177 Module *M = CI->getModule();
1178 if (FunctionCallee FPExpr =
1179 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
1180 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0do { } while (false)
1181 << ")\n")do { } while (false);
1182 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
1183 replaceCall(nval);
1184 return true;
1185 }
1186 }
1187 return false;
1188}
1189
1190bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
1191 const FuncInfo &FInfo) {
1192 Value *opr0 = CI->getArgOperand(0);
1193 Value *opr1 = CI->getArgOperand(1);
1194 Value *opr2 = CI->getArgOperand(2);
1195
1196 ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
1197 ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
1198 if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
1199 // fma/mad(a, b, c) = c if a=0 || b=0
1200 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n")do { } while (false);
1201 replaceCall(opr2);
1202 return true;
1203 }
1204 if (CF0 && CF0->isExactlyValue(1.0f)) {
1205 // fma/mad(a, b, c) = b+c if a=1
1206 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2do { } while (false)
1207 << "\n")do { } while (false);
1208 Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
1209 replaceCall(nval);
1210 return true;
1211 }
1212 if (CF1 && CF1->isExactlyValue(1.0f)) {
1213 // fma/mad(a, b, c) = a+c if b=1
1214 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2do { } while (false)
1215 << "\n")do { } while (false);
1216 Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
1217 replaceCall(nval);
1218 return true;
1219 }
1220 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
1221 if (CF->isZero()) {
1222 // fma/mad(a, b, c) = a*b if c=0
1223 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "do { } while (false)
1224 << *opr1 << "\n")do { } while (false);
1225 Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
1226 replaceCall(nval);
1227 return true;
1228 }
1229 }
1230
1231 return false;
1232}
1233
1234// Get a scalar native builtin signle argument FP function
1235FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
1236 const FuncInfo &FInfo) {
1237 if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1238 return nullptr;
1239 FuncInfo nf = FInfo;
1240 nf.setPrefix(AMDGPULibFunc::NATIVE);
1241 return getFunction(M, nf);
1242}
1243
1244// fold sqrt -> native_sqrt (x)
1245bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
1246 const FuncInfo &FInfo) {
1247 if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
1248 (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
1249 if (FunctionCallee FPExpr = getNativeFunction(
1250 CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
1251 Value *opr0 = CI->getArgOperand(0);
1252 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { } while (false)
1253 << "sqrt(" << *opr0 << ")\n")do { } while (false);
1254 Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
1255 replaceCall(nval);
1256 return true;
1257 }
1258 }
1259 return false;
1260}
1261
1262// fold sin, cos -> sincos.
1263bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
1264 AliasAnalysis *AA) {
1265 AMDGPULibFunc fInfo;
1266 if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
1267 return false;
1268
1269 assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||((void)0)
1270 fInfo.getId() == AMDGPULibFunc::EI_COS)((void)0);
1271 bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
1272
1273 Value *CArgVal = CI->getArgOperand(0);
1274 BasicBlock * const CBB = CI->getParent();
1275
1276 int const MaxScan = 30;
1277 bool Changed = false;
1278
1279 { // fold in load value.
1280 LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
1281 if (LI && LI->getParent() == CBB) {
1282 BasicBlock::iterator BBI = LI->getIterator();
1283 Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
1284 if (AvailableVal) {
1285 Changed = true;
1286 CArgVal->replaceAllUsesWith(AvailableVal);
1287 if (CArgVal->getNumUses() == 0)
1288 LI->eraseFromParent();
1289 CArgVal = CI->getArgOperand(0);
1290 }
1291 }
1292 }
1293
1294 Module *M = CI->getModule();
1295 fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN);
1296 std::string const PairName = fInfo.mangle();
1297
1298 CallInst *UI = nullptr;
1299 for (User* U : CArgVal->users()) {
1300 CallInst *XI = dyn_cast_or_null<CallInst>(U);
1301 if (!XI || XI == CI || XI->getParent() != CBB)
1302 continue;
1303
1304 Function *UCallee = XI->getCalledFunction();
1305 if (!UCallee || !UCallee->getName().equals(PairName))
1306 continue;
1307
1308 BasicBlock::iterator BBI = CI->getIterator();
1309 if (BBI == CI->getParent()->begin())
1310 break;
1311 --BBI;
1312 for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
1313 if (cast<Instruction>(BBI) == XI) {
1314 UI = XI;
1315 break;
1316 }
1317 }
1318 if (UI) break;
1319 }
1320
1321 if (!UI)
1322 return Changed;
1323
1324 // Merge the sin and cos.
1325
1326 // for OpenCL 2.0 we have only generic implementation of sincos
1327 // function.
1328 AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
1329 nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
1330 FunctionCallee Fsincos = getFunction(M, nf);
1331 if (!Fsincos)
1332 return Changed;
1333
1334 BasicBlock::iterator ItOld = B.GetInsertPoint();
1335 AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
1336 B.SetInsertPoint(UI);
1337
1338 Value *P = Alloc;
1339 Type *PTy = Fsincos.getFunctionType()->getParamType(1);
1340 // The allocaInst allocates the memory in private address space. This need
1341 // to be bitcasted to point to the address space of cos pointer type.
1342 // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1343 if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
1344 P = B.CreateAddrSpaceCast(Alloc, PTy);
1345 CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
1346
1347 LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "do { } while (false)
1348 << *Call << "\n")do { } while (false);
1349
1350 if (!isSin) { // CI->cos, UI->sin
1351 B.SetInsertPoint(&*ItOld);
1352 UI->replaceAllUsesWith(&*Call);
1353 Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1354 CI->replaceAllUsesWith(Reload);
1355 UI->eraseFromParent();
1356 CI->eraseFromParent();
1357 } else { // CI->sin, UI->cos
1358 Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1359 UI->replaceAllUsesWith(Reload);
1360 CI->replaceAllUsesWith(Call);
1361 UI->eraseFromParent();
1362 CI->eraseFromParent();
1363 }
1364 return true;
1365}
1366
1367bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
1368 if (!TM)
1369 return false;
1370
1371 StringRef CPU = TM->getTargetCPU();
1372 StringRef Features = TM->getTargetFeatureString();
1373 if ((CPU.empty() || CPU.equals_insensitive("generic")) &&
1374 (Features.empty() ||
1375 Features.find_insensitive("wavefrontsize") == StringRef::npos))
1376 return false;
1377
1378 Function *F = CI->getParent()->getParent();
1379 const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
1380 unsigned N = ST.getWavefrontSize();
1381
1382 LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "do { } while (false)
1383 << N << "\n")do { } while (false);
1384
1385 CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
1386 CI->eraseFromParent();
1387 return true;
1388}
1389
1390// Get insertion point at entry.
1391BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
1392 Function * Func = UI->getParent()->getParent();
1393 BasicBlock * BB = &Func->getEntryBlock();
1394 assert(BB && "Entry block not found!")((void)0);
1395 BasicBlock::iterator ItNew = BB->begin();
1396 return ItNew;
1397}
1398
1399// Insert a AllocsInst at the beginning of function entry block.
1400AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
1401 const char *prefix) {
1402 BasicBlock::iterator ItNew = getEntryIns(UI);
1403 Function *UCallee = UI->getCalledFunction();
1404 Type *RetType = UCallee->getReturnType();
1405 B.SetInsertPoint(&*ItNew);
1406 AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
1407 std::string(prefix) + UI->getName());
1408 Alloc->setAlignment(
1409 Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
1410 return Alloc;
1411}
1412
1413bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
1414 double& Res0, double& Res1,
1415 Constant *copr0, Constant *copr1,
1416 Constant *copr2) {
1417 // By default, opr0/opr1/opr3 holds values of float/double type.
1418 // If they are not float/double, each function has to its
1419 // operand separately.
1420 double opr0=0.0, opr1=0.0, opr2=0.0;
1421 ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
1422 ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
1423 ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
1424 if (fpopr0) {
1425 opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1426 ? fpopr0->getValueAPF().convertToDouble()
1427 : (double)fpopr0->getValueAPF().convertToFloat();
1428 }
1429
1430 if (fpopr1) {
1431 opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1432 ? fpopr1->getValueAPF().convertToDouble()
1433 : (double)fpopr1->getValueAPF().convertToFloat();
1434 }
1435
1436 if (fpopr2) {
1437 opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1438 ? fpopr2->getValueAPF().convertToDouble()
1439 : (double)fpopr2->getValueAPF().convertToFloat();
1440 }
1441
1442 switch (FInfo.getId()) {
1443 default : return false;
1444
1445 case AMDGPULibFunc::EI_ACOS:
1446 Res0 = acos(opr0);
1447 return true;
1448
1449 case AMDGPULibFunc::EI_ACOSH:
1450 // acosh(x) == log(x + sqrt(x*x - 1))
1451 Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
1452 return true;
1453
1454 case AMDGPULibFunc::EI_ACOSPI:
1455 Res0 = acos(opr0) / MATH_PInumbers::pi;
1456 return true;
1457
1458 case AMDGPULibFunc::EI_ASIN:
1459 Res0 = asin(opr0);
1460 return true;
1461
1462 case AMDGPULibFunc::EI_ASINH:
1463 // asinh(x) == log(x + sqrt(x*x + 1))
1464 Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
1465 return true;
1466
1467 case AMDGPULibFunc::EI_ASINPI:
1468 Res0 = asin(opr0) / MATH_PInumbers::pi;
1469 return true;
1470
1471 case AMDGPULibFunc::EI_ATAN:
1472 Res0 = atan(opr0);
1473 return true;
1474
1475 case AMDGPULibFunc::EI_ATANH:
1476 // atanh(x) == (log(x+1) - log(x-1))/2;
1477 Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
1478 return true;
1479
1480 case AMDGPULibFunc::EI_ATANPI:
1481 Res0 = atan(opr0) / MATH_PInumbers::pi;
1482 return true;
1483
1484 case AMDGPULibFunc::EI_CBRT:
1485 Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
1486 return true;
1487
1488 case AMDGPULibFunc::EI_COS:
1489 Res0 = cos(opr0);
1490 return true;
1491
1492 case AMDGPULibFunc::EI_COSH:
1493 Res0 = cosh(opr0);
1494 return true;
1495
1496 case AMDGPULibFunc::EI_COSPI:
1497 Res0 = cos(MATH_PInumbers::pi * opr0);
1498 return true;
1499
1500 case AMDGPULibFunc::EI_EXP:
1501 Res0 = exp(opr0);
1502 return true;
1503
1504 case AMDGPULibFunc::EI_EXP2:
1505 Res0 = pow(2.0, opr0);
1506 return true;
1507
1508 case AMDGPULibFunc::EI_EXP10:
1509 Res0 = pow(10.0, opr0);
1510 return true;
1511
1512 case AMDGPULibFunc::EI_EXPM1:
1513 Res0 = exp(opr0) - 1.0;
1514 return true;
1515
1516 case AMDGPULibFunc::EI_LOG:
1517 Res0 = log(opr0);
1518 return true;
1519
1520 case AMDGPULibFunc::EI_LOG2:
1521 Res0 = log(opr0) / log(2.0);
1522 return true;
1523
1524 case AMDGPULibFunc::EI_LOG10:
1525 Res0 = log(opr0) / log(10.0);
1526 return true;
1527
1528 case AMDGPULibFunc::EI_RSQRT:
1529 Res0 = 1.0 / sqrt(opr0);
1530 return true;
1531
1532 case AMDGPULibFunc::EI_SIN:
1533 Res0 = sin(opr0);
1534 return true;
1535
1536 case AMDGPULibFunc::EI_SINH:
1537 Res0 = sinh(opr0);
1538 return true;
1539
1540 case AMDGPULibFunc::EI_SINPI:
1541 Res0 = sin(MATH_PInumbers::pi * opr0);
1542 return true;
1543
1544 case AMDGPULibFunc::EI_SQRT:
1545 Res0 = sqrt(opr0);
1546 return true;
1547
1548 case AMDGPULibFunc::EI_TAN:
1549 Res0 = tan(opr0);
1550 return true;
1551
1552 case AMDGPULibFunc::EI_TANH:
1553 Res0 = tanh(opr0);
1554 return true;
1555
1556 case AMDGPULibFunc::EI_TANPI:
1557 Res0 = tan(MATH_PInumbers::pi * opr0);
1558 return true;
1559
1560 case AMDGPULibFunc::EI_RECIP:
1561 Res0 = 1.0 / opr0;
1562 return true;
1563
1564 // two-arg functions
1565 case AMDGPULibFunc::EI_DIVIDE:
1566 Res0 = opr0 / opr1;
1567 return true;
1568
1569 case AMDGPULibFunc::EI_POW:
1570 case AMDGPULibFunc::EI_POWR:
1571 Res0 = pow(opr0, opr1);
1572 return true;
1573
1574 case AMDGPULibFunc::EI_POWN: {
1575 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1576 double val = (double)iopr1->getSExtValue();
1577 Res0 = pow(opr0, val);
1578 return true;
1579 }
1580 return false;
1581 }
1582
1583 case AMDGPULibFunc::EI_ROOTN: {
1584 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1585 double val = (double)iopr1->getSExtValue();
1586 Res0 = pow(opr0, 1.0 / val);
1587 return true;
1588 }
1589 return false;
1590 }
1591
1592 // with ptr arg
1593 case AMDGPULibFunc::EI_SINCOS:
1594 Res0 = sin(opr0);
1595 Res1 = cos(opr0);
1596 return true;
1597
1598 // three-arg functions
1599 case AMDGPULibFunc::EI_FMA:
1600 case AMDGPULibFunc::EI_MAD:
1601 Res0 = opr0 * opr1 + opr2;
1602 return true;
1603 }
1604
1605 return false;
1606}
1607
1608bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
1609 int numArgs = (int)aCI->getNumArgOperands();
1610 if (numArgs > 3)
1611 return false;
1612
1613 Constant *copr0 = nullptr;
1614 Constant *copr1 = nullptr;
1615 Constant *copr2 = nullptr;
1616 if (numArgs > 0) {
1617 if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
1618 return false;
1619 }
1620
1621 if (numArgs > 1) {
1622 if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
1623 if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
1624 return false;
1625 }
1626 }
1627
1628 if (numArgs > 2) {
1629 if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
1630 return false;
1631 }
1632
1633 // At this point, all arguments to aCI are constants.
1634
1635 // max vector size is 16, and sincos will generate two results.
1636 double DVal0[16], DVal1[16];
1637 bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
1638 if (getVecSize(FInfo) == 1) {
1639 if (!evaluateScalarMathFunc(FInfo, DVal0[0],
1640 DVal1[0], copr0, copr1, copr2)) {
1641 return false;
1642 }
1643 } else {
1644 ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
1645 ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
1646 ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
1647 for (int i=0; i < getVecSize(FInfo); ++i) {
1648 Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
1649 Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
1650 Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
1651 if (!evaluateScalarMathFunc(FInfo, DVal0[i],
1652 DVal1[i], celt0, celt1, celt2)) {
1653 return false;
1654 }
1655 }
1656 }
1657
1658 LLVMContext &context = CI->getParent()->getParent()->getContext();
1659 Constant *nval0, *nval1;
1660 if (getVecSize(FInfo) == 1) {
1661 nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
1662 if (hasTwoResults)
1663 nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
1664 } else {
1665 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1666 SmallVector <float, 0> FVal0, FVal1;
1667 for (int i=0; i < getVecSize(FInfo); ++i)
1668 FVal0.push_back((float)DVal0[i]);
1669 ArrayRef<float> tmp0(FVal0);
1670 nval0 = ConstantDataVector::get(context, tmp0);
1671 if (hasTwoResults) {
1672 for (int i=0; i < getVecSize(FInfo); ++i)
1673 FVal1.push_back((float)DVal1[i]);
1674 ArrayRef<float> tmp1(FVal1);
1675 nval1 = ConstantDataVector::get(context, tmp1);
1676 }
1677 } else {
1678 ArrayRef<double> tmp0(DVal0);
1679 nval0 = ConstantDataVector::get(context, tmp0);
1680 if (hasTwoResults) {
1681 ArrayRef<double> tmp1(DVal1);
1682 nval1 = ConstantDataVector::get(context, tmp1);
1683 }
1684 }
1685 }
1686
1687 if (hasTwoResults) {
1688 // sincos
1689 assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&((void)0)
1690 "math function with ptr arg not supported yet")((void)0);
1691 new StoreInst(nval1, aCI->getArgOperand(1), aCI);
1692 }
1693
1694 replaceCall(nval0);
1695 return true;
1696}
1697
1698// Public interface to the Simplify LibCalls pass.
1699FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetMachine *TM) {
1700 return new AMDGPUSimplifyLibCalls(TM);
1701}
1702
1703FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
1704 return new AMDGPUUseNativeCalls();
1705}
1706
1707bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
1708 if (skipFunction(F))
1709 return false;
1710
1711 bool Changed = false;
1712 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1713
1714 LLVM_DEBUG(dbgs() << "AMDIC: process function ";do { } while (false)
1715 F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';)do { } while (false);
1716
1717 for (auto &BB : F) {
1718 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1719 // Ignore non-calls.
1720 CallInst *CI = dyn_cast<CallInst>(I);
1721 ++I;
1722 // Ignore intrinsics that do not become real instructions.
1723 if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
1724 continue;
1725
1726 // Ignore indirect calls.
1727 Function *Callee = CI->getCalledFunction();
1728 if (Callee == 0) continue;
1729
1730 LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";do { } while (false)
1731 dbgs().flush())do { } while (false);
1732 if(Simplifier.fold(CI, AA))
1733 Changed = true;
1734 }
1735 }
1736 return Changed;
1737}
1738
1739PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F,
1740 FunctionAnalysisManager &AM) {
1741 AMDGPULibCalls Simplifier(&TM);
1742 Simplifier.initNativeFuncs();
1743
1744 bool Changed = false;
1745 auto AA = &AM.getResult<AAManager>(F);
1746
1747 LLVM_DEBUG(dbgs() << "AMDIC: process function ";do { } while (false)
1748 F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';)do { } while (false);
1749
1750 for (auto &BB : F) {
1751 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1752 // Ignore non-calls.
1753 CallInst *CI = dyn_cast<CallInst>(I);
1754 ++I;
1755 // Ignore intrinsics that do not become real instructions.
1756 if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
1757 continue;
1758
1759 // Ignore indirect calls.
1760 Function *Callee = CI->getCalledFunction();
1761 if (Callee == 0)
1762 continue;
1763
1764 LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";do { } while (false)
1765 dbgs().flush())do { } while (false);
1766 if (Simplifier.fold(CI, AA))
1767 Changed = true;
1768 }
1769 }
1770 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1771}
1772
1773bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
1774 if (skipFunction(F) || UseNative.empty())
1775 return false;
1776
1777 bool Changed = false;
1778 for (auto &BB : F) {
1779 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1780 // Ignore non-calls.
1781 CallInst *CI = dyn_cast<CallInst>(I);
1782 ++I;
1783 if (!CI) continue;
1784
1785 // Ignore indirect calls.
1786 Function *Callee = CI->getCalledFunction();
1787 if (Callee == 0) continue;
1788
1789 if(Simplifier.useNative(CI))
1790 Changed = true;
1791 }
1792 }
1793 return Changed;
1794}
1795
1796PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F,
1797 FunctionAnalysisManager &AM) {
1798 if (UseNative.empty())
1799 return PreservedAnalyses::all();
1800
1801 AMDGPULibCalls Simplifier;
1802 Simplifier.initNativeFuncs();
1803
1804 bool Changed = false;
1805 for (auto &BB : F) {
1806 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1807 // Ignore non-calls.
1808 CallInst *CI = dyn_cast<CallInst>(I);
1809 ++I;
1810 if (!CI)
1811 continue;
1812
1813 // Ignore indirect calls.
1814 Function *Callee = CI->getCalledFunction();
1815 if (Callee == 0)
1816 continue;
1817
1818 if (Simplifier.useNative(CI))
1819 Changed = true;
1820 }
1821 }
1822 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1823}