Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Warning:line 1078, column 20
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name OMPIRBuilder.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
1//===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file implements the OpenMPIRBuilder class, which is used as a
11/// convenient way to create LLVM instructions for OpenMP directives.
12///
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
16
17#include "llvm/ADT/StringRef.h"
18#include "llvm/ADT/Triple.h"
19#include "llvm/IR/CFG.h"
20#include "llvm/IR/DebugInfo.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/MDBuilder.h"
23#include "llvm/IR/Value.h"
24#include "llvm/Support/CommandLine.h"
25#include "llvm/Support/Error.h"
26#include "llvm/Transforms/Utils/BasicBlockUtils.h"
27#include "llvm/Transforms/Utils/CodeExtractor.h"
28
29#include <sstream>
30
31#define DEBUG_TYPE"openmp-ir-builder" "openmp-ir-builder"
32
33using namespace llvm;
34using namespace omp;
35
36static cl::opt<bool>
37 OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
38 cl::desc("Use optimistic attributes describing "
39 "'as-if' properties of runtime calls."),
40 cl::init(false));
41
42void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
43 LLVMContext &Ctx = Fn.getContext();
44
45 // Get the function's current attributes.
46 auto Attrs = Fn.getAttributes();
47 auto FnAttrs = Attrs.getFnAttributes();
48 auto RetAttrs = Attrs.getRetAttributes();
49 SmallVector<AttributeSet, 4> ArgAttrs;
50 for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo)
51 ArgAttrs.emplace_back(Attrs.getParamAttributes(ArgNo));
52
53#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
54#include "llvm/Frontend/OpenMP/OMPKinds.def"
55
56 // Add attributes to the function declaration.
57 switch (FnID) {
58#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
59 case Enum: \
60 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
61 RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \
62 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
63 ArgAttrs[ArgNo] = \
64 ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \
65 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
66 break;
67#include "llvm/Frontend/OpenMP/OMPKinds.def"
68 default:
69 // Attributes are optional.
70 break;
71 }
72}
73
74FunctionCallee
75OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, RuntimeFunction FnID) {
76 FunctionType *FnTy = nullptr;
77 Function *Fn = nullptr;
78
79 // Try to find the declation in the module first.
80 switch (FnID) {
81#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
82 case Enum: \
83 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
84 IsVarArg); \
85 Fn = M.getFunction(Str); \
86 break;
87#include "llvm/Frontend/OpenMP/OMPKinds.def"
88 }
89
90 if (!Fn) {
91 // Create a new declaration if we need one.
92 switch (FnID) {
93#define OMP_RTL(Enum, Str, ...) \
94 case Enum: \
95 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
96 break;
97#include "llvm/Frontend/OpenMP/OMPKinds.def"
98 }
99
100 // Add information if the runtime function takes a callback function
101 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
102 if (!Fn->hasMetadata(LLVMContext::MD_callback)) {
103 LLVMContext &Ctx = Fn->getContext();
104 MDBuilder MDB(Ctx);
105 // Annotate the callback behavior of the runtime function:
106 // - The callback callee is argument number 2 (microtask).
107 // - The first two arguments of the callback callee are unknown (-1).
108 // - All variadic arguments to the runtime function are passed to the
109 // callback callee.
110 Fn->addMetadata(
111 LLVMContext::MD_callback,
112 *MDNode::get(Ctx, {MDB.createCallbackEncoding(
113 2, {-1, -1}, /* VarArgsArePassed */ true)}));
114 }
115 }
116
117 LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName()do { } while (false)
118 << " with type " << *Fn->getFunctionType() << "\n")do { } while (false);
119 addAttributes(FnID, *Fn);
120
121 } else {
122 LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName()do { } while (false)
123 << " with type " << *Fn->getFunctionType() << "\n")do { } while (false);
124 }
125
126 assert(Fn && "Failed to create OpenMP runtime function")((void)0);
127
128 // Cast the function to the expected type if necessary
129 Constant *C = ConstantExpr::getBitCast(Fn, FnTy->getPointerTo());
130 return {FnTy, C};
131}
132
133Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) {
134 FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID);
135 auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee());
136 assert(Fn && "Failed to create OpenMP runtime function pointer")((void)0);
137 return Fn;
138}
139
140void OpenMPIRBuilder::initialize() { initializeTypes(M); }
141
142void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) {
143 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
144 SmallVector<BasicBlock *, 32> Blocks;
145 SmallVector<OutlineInfo, 16> DeferredOutlines;
146 for (OutlineInfo &OI : OutlineInfos) {
147 // Skip functions that have not finalized yet; may happen with nested
148 // function generation.
149 if (Fn && OI.getFunction() != Fn) {
150 DeferredOutlines.push_back(OI);
151 continue;
152 }
153
154 ParallelRegionBlockSet.clear();
155 Blocks.clear();
156 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
157
158 Function *OuterFn = OI.getFunction();
159 CodeExtractorAnalysisCache CEAC(*OuterFn);
160 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
161 /* AggregateArgs */ false,
162 /* BlockFrequencyInfo */ nullptr,
163 /* BranchProbabilityInfo */ nullptr,
164 /* AssumptionCache */ nullptr,
165 /* AllowVarArgs */ true,
166 /* AllowAlloca */ true,
167 /* Suffix */ ".omp_par");
168
169 LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n")do { } while (false);
170 LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()do { } while (false)
171 << " Exit: " << OI.ExitBB->getName() << "\n")do { } while (false);
172 assert(Extractor.isEligible() &&((void)0)
173 "Expected OpenMP outlining to be possible!")((void)0);
174
175 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
176
177 LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n")do { } while (false);
178 LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n")do { } while (false);
179 assert(OutlinedFn->getReturnType()->isVoidTy() &&((void)0)
180 "OpenMP outlined functions should not return a value!")((void)0);
181
182 // For compability with the clang CG we move the outlined function after the
183 // one with the parallel region.
184 OutlinedFn->removeFromParent();
185 M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
186
187 // Remove the artificial entry introduced by the extractor right away, we
188 // made our own entry block after all.
189 {
190 BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
191 assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB)((void)0);
192 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry)((void)0);
193 if (AllowExtractorSinking) {
194 // Move instructions from the to-be-deleted ArtificialEntry to the entry
195 // basic block of the parallel region. CodeExtractor may have sunk
196 // allocas/bitcasts for values that are solely used in the outlined
197 // region and do not escape.
198 assert(!ArtificialEntry.empty() &&((void)0)
199 "Expected instructions to sink in the outlined region")((void)0);
200 for (BasicBlock::iterator It = ArtificialEntry.begin(),
201 End = ArtificialEntry.end();
202 It != End;) {
203 Instruction &I = *It;
204 It++;
205
206 if (I.isTerminator())
207 continue;
208
209 I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
210 }
211 }
212 OI.EntryBB->moveBefore(&ArtificialEntry);
213 ArtificialEntry.eraseFromParent();
214 }
215 assert(&OutlinedFn->getEntryBlock() == OI.EntryBB)((void)0);
216 assert(OutlinedFn && OutlinedFn->getNumUses() == 1)((void)0);
217
218 // Run a user callback, e.g. to add attributes.
219 if (OI.PostOutlineCB)
220 OI.PostOutlineCB(*OutlinedFn);
221 }
222
223 // Remove work items that have been completed.
224 OutlineInfos = std::move(DeferredOutlines);
225}
226
227OpenMPIRBuilder::~OpenMPIRBuilder() {
228 assert(OutlineInfos.empty() && "There must be no outstanding outlinings")((void)0);
229}
230
231Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
232 IdentFlag LocFlags,
233 unsigned Reserve2Flags) {
234 // Enable "C-mode".
235 LocFlags |= OMP_IDENT_FLAG_KMPC;
236
237 Value *&Ident =
238 IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
239 if (!Ident) {
240 Constant *I32Null = ConstantInt::getNullValue(Int32);
241 Constant *IdentData[] = {
242 I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)),
243 ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr};
244 Constant *Initializer = ConstantStruct::get(
245 cast<StructType>(IdentPtr->getPointerElementType()), IdentData);
246
247 // Look for existing encoding of the location + flags, not needed but
248 // minimizes the difference to the existing solution while we transition.
249 for (GlobalVariable &GV : M.getGlobalList())
250 if (GV.getType() == IdentPtr && GV.hasInitializer())
251 if (GV.getInitializer() == Initializer)
252 return Ident = &GV;
253
254 auto *GV = new GlobalVariable(M, IdentPtr->getPointerElementType(),
255 /* isConstant = */ true,
256 GlobalValue::PrivateLinkage, Initializer);
257 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
258 GV->setAlignment(Align(8));
259 Ident = GV;
260 }
261 return Builder.CreatePointerCast(Ident, IdentPtr);
262}
263
264Type *OpenMPIRBuilder::getLanemaskType() {
265 LLVMContext &Ctx = M.getContext();
266 Triple triple(M.getTargetTriple());
267
268 // This test is adequate until deviceRTL has finer grained lane widths
269 return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx);
270}
271
272Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
273 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
274 if (!SrcLocStr) {
275 Constant *Initializer =
276 ConstantDataArray::getString(M.getContext(), LocStr);
277
278 // Look for existing encoding of the location, not needed but minimizes the
279 // difference to the existing solution while we transition.
280 for (GlobalVariable &GV : M.getGlobalList())
281 if (GV.isConstant() && GV.hasInitializer() &&
282 GV.getInitializer() == Initializer)
283 return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
284
285 SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "",
286 /* AddressSpace */ 0, &M);
287 }
288 return SrcLocStr;
289}
290
291Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName,
292 StringRef FileName,
293 unsigned Line,
294 unsigned Column) {
295 SmallString<128> Buffer;
296 Buffer.push_back(';');
297 Buffer.append(FileName);
298 Buffer.push_back(';');
299 Buffer.append(FunctionName);
300 Buffer.push_back(';');
301 Buffer.append(std::to_string(Line));
302 Buffer.push_back(';');
303 Buffer.append(std::to_string(Column));
304 Buffer.push_back(';');
305 Buffer.push_back(';');
306 return getOrCreateSrcLocStr(Buffer.str());
307}
308
309Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() {
310 return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
311}
312
313Constant *
314OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
315 DILocation *DIL = Loc.DL.get();
316 if (!DIL)
317 return getOrCreateDefaultSrcLocStr();
318 StringRef FileName = M.getName();
319 if (DIFile *DIF = DIL->getFile())
320 if (Optional<StringRef> Source = DIF->getSource())
321 FileName = *Source;
322 StringRef Function = DIL->getScope()->getSubprogram()->getName();
323 Function =
324 !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName();
325 return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
326 DIL->getColumn());
327}
328
329Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
330 return Builder.CreateCall(
331 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
332 "omp_global_thread_num");
333}
334
335OpenMPIRBuilder::InsertPointTy
336OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK,
337 bool ForceSimpleCall, bool CheckCancelFlag) {
338 if (!updateToLocation(Loc))
339 return Loc.IP;
340 return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
341}
342
343OpenMPIRBuilder::InsertPointTy
344OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
345 bool ForceSimpleCall, bool CheckCancelFlag) {
346 // Build call __kmpc_cancel_barrier(loc, thread_id) or
347 // __kmpc_barrier(loc, thread_id);
348
349 IdentFlag BarrierLocFlags;
350 switch (Kind) {
351 case OMPD_for:
352 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
353 break;
354 case OMPD_sections:
355 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
356 break;
357 case OMPD_single:
358 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
359 break;
360 case OMPD_barrier:
361 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
362 break;
363 default:
364 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
365 break;
366 }
367
368 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
369 Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags),
370 getOrCreateThreadID(getOrCreateIdent(SrcLocStr))};
371
372 // If we are in a cancellable parallel region, barriers are cancellation
373 // points.
374 // TODO: Check why we would force simple calls or to ignore the cancel flag.
375 bool UseCancelBarrier =
376 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
377
378 Value *Result =
379 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
380 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
381 : OMPRTL___kmpc_barrier),
382 Args);
383
384 if (UseCancelBarrier && CheckCancelFlag)
385 emitCancelationCheckImpl(Result, OMPD_parallel);
386
387 return Builder.saveIP();
388}
389
390OpenMPIRBuilder::InsertPointTy
391OpenMPIRBuilder::createCancel(const LocationDescription &Loc,
392 Value *IfCondition,
393 omp::Directive CanceledDirective) {
394 if (!updateToLocation(Loc))
395 return Loc.IP;
396
397 // LLVM utilities like blocks with terminators.
398 auto *UI = Builder.CreateUnreachable();
399
400 Instruction *ThenTI = UI, *ElseTI = nullptr;
401 if (IfCondition)
402 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
403 Builder.SetInsertPoint(ThenTI);
404
405 Value *CancelKind = nullptr;
406 switch (CanceledDirective) {
407#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
408 case DirectiveEnum: \
409 CancelKind = Builder.getInt32(Value); \
410 break;
411#include "llvm/Frontend/OpenMP/OMPKinds.def"
412 default:
413 llvm_unreachable("Unknown cancel kind!")__builtin_unreachable();
414 }
415
416 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
417 Value *Ident = getOrCreateIdent(SrcLocStr);
418 Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
419 Value *Result = Builder.CreateCall(
420 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
421 auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) {
422 if (CanceledDirective == OMPD_parallel) {
423 IRBuilder<>::InsertPointGuard IPG(Builder);
424 Builder.restoreIP(IP);
425 createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
426 omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
427 /* CheckCancelFlag */ false);
428 }
429 };
430
431 // The actual cancel logic is shared with others, e.g., cancel_barriers.
432 emitCancelationCheckImpl(Result, CanceledDirective, ExitCB);
433
434 // Update the insertion point and remove the terminator we introduced.
435 Builder.SetInsertPoint(UI->getParent());
436 UI->eraseFromParent();
437
438 return Builder.saveIP();
439}
440
441void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag,
442 omp::Directive CanceledDirective,
443 FinalizeCallbackTy ExitCB) {
444 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&((void)0)
445 "Unexpected cancellation!")((void)0);
446
447 // For a cancel barrier we create two new blocks.
448 BasicBlock *BB = Builder.GetInsertBlock();
449 BasicBlock *NonCancellationBlock;
450 if (Builder.GetInsertPoint() == BB->end()) {
451 // TODO: This branch will not be needed once we moved to the
452 // OpenMPIRBuilder codegen completely.
453 NonCancellationBlock = BasicBlock::Create(
454 BB->getContext(), BB->getName() + ".cont", BB->getParent());
455 } else {
456 NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
457 BB->getTerminator()->eraseFromParent();
458 Builder.SetInsertPoint(BB);
459 }
460 BasicBlock *CancellationBlock = BasicBlock::Create(
461 BB->getContext(), BB->getName() + ".cncl", BB->getParent());
462
463 // Jump to them based on the return value.
464 Value *Cmp = Builder.CreateIsNull(CancelFlag);
465 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
466 /* TODO weight */ nullptr, nullptr);
467
468 // From the cancellation block we finalize all variables and go to the
469 // post finalization block that is known to the FiniCB callback.
470 Builder.SetInsertPoint(CancellationBlock);
471 if (ExitCB)
472 ExitCB(Builder.saveIP());
473 auto &FI = FinalizationStack.back();
474 FI.FiniCB(Builder.saveIP());
475
476 // The continuation block is where code generation continues.
477 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
478}
479
480IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
481 const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
482 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
483 FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
484 omp::ProcBindKind ProcBind, bool IsCancellable) {
485 if (!updateToLocation(Loc))
486 return Loc.IP;
487
488 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
489 Value *Ident = getOrCreateIdent(SrcLocStr);
490 Value *ThreadID = getOrCreateThreadID(Ident);
491
492 if (NumThreads) {
493 // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
494 Value *Args[] = {
495 Ident, ThreadID,
496 Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
497 Builder.CreateCall(
498 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
499 }
500
501 if (ProcBind != OMP_PROC_BIND_default) {
502 // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
503 Value *Args[] = {
504 Ident, ThreadID,
505 ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
506 Builder.CreateCall(
507 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
508 }
509
510 BasicBlock *InsertBB = Builder.GetInsertBlock();
511 Function *OuterFn = InsertBB->getParent();
512
513 // Save the outer alloca block because the insertion iterator may get
514 // invalidated and we still need this later.
515 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
516
517 // Vector to remember instructions we used only during the modeling but which
518 // we want to delete at the end.
519 SmallVector<Instruction *, 4> ToBeDeleted;
520
521 // Change the location to the outer alloca insertion point to create and
522 // initialize the allocas we pass into the parallel region.
523 Builder.restoreIP(OuterAllocaIP);
524 AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
525 AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
526
527 // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
528 // program, otherwise we only need them for modeling purposes to get the
529 // associated arguments in the outlined function. In the former case,
530 // initialize the allocas properly, in the latter case, delete them later.
531 if (IfCondition) {
532 Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
533 Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
534 } else {
535 ToBeDeleted.push_back(TIDAddr);
536 ToBeDeleted.push_back(ZeroAddr);
537 }
538
539 // Create an artificial insertion point that will also ensure the blocks we
540 // are about to split are not degenerated.
541 auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
542
543 Instruction *ThenTI = UI, *ElseTI = nullptr;
544 if (IfCondition)
545 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
546
547 BasicBlock *ThenBB = ThenTI->getParent();
548 BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
549 BasicBlock *PRegBodyBB =
550 PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
551 BasicBlock *PRegPreFiniBB =
552 PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
553 BasicBlock *PRegExitBB =
554 PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
555
556 auto FiniCBWrapper = [&](InsertPointTy IP) {
557 // Hide "open-ended" blocks from the given FiniCB by setting the right jump
558 // target to the region exit block.
559 if (IP.getBlock()->end() == IP.getPoint()) {
560 IRBuilder<>::InsertPointGuard IPG(Builder);
561 Builder.restoreIP(IP);
562 Instruction *I = Builder.CreateBr(PRegExitBB);
563 IP = InsertPointTy(I->getParent(), I->getIterator());
564 }
565 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&((void)0)
566 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&((void)0)
567 "Unexpected insertion point for finalization call!")((void)0);
568 return FiniCB(IP);
569 };
570
571 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
572
573 // Generate the privatization allocas in the block that will become the entry
574 // of the outlined function.
575 Builder.SetInsertPoint(PRegEntryBB->getTerminator());
576 InsertPointTy InnerAllocaIP = Builder.saveIP();
577
578 AllocaInst *PrivTIDAddr =
579 Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
580 Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid");
581
582 // Add some fake uses for OpenMP provided arguments.
583 ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"));
584 Instruction *ZeroAddrUse = Builder.CreateLoad(Int32, ZeroAddr,
585 "zero.addr.use");
586 ToBeDeleted.push_back(ZeroAddrUse);
587
588 // ThenBB
589 // |
590 // V
591 // PRegionEntryBB <- Privatization allocas are placed here.
592 // |
593 // V
594 // PRegionBodyBB <- BodeGen is invoked here.
595 // |
596 // V
597 // PRegPreFiniBB <- The block we will start finalization from.
598 // |
599 // V
600 // PRegionExitBB <- A common exit to simplify block collection.
601 //
602
603 LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n")do { } while (false);
604
605 // Let the caller create the body.
606 assert(BodyGenCB && "Expected body generation callback!")((void)0);
607 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
608 BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);
609
610 LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n")do { } while (false);
611
612 FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
613 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
614 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
615 llvm::LLVMContext &Ctx = F->getContext();
616 MDBuilder MDB(Ctx);
617 // Annotate the callback behavior of the __kmpc_fork_call:
618 // - The callback callee is argument number 2 (microtask).
619 // - The first two arguments of the callback callee are unknown (-1).
620 // - All variadic arguments to the __kmpc_fork_call are passed to the
621 // callback callee.
622 F->addMetadata(
623 llvm::LLVMContext::MD_callback,
624 *llvm::MDNode::get(
625 Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
626 /* VarArgsArePassed */ true)}));
627 }
628 }
629
630 OutlineInfo OI;
631 OI.PostOutlineCB = [=](Function &OutlinedFn) {
632 // Add some known attributes.
633 OutlinedFn.addParamAttr(0, Attribute::NoAlias);
634 OutlinedFn.addParamAttr(1, Attribute::NoAlias);
635 OutlinedFn.addFnAttr(Attribute::NoUnwind);
636 OutlinedFn.addFnAttr(Attribute::NoRecurse);
637
638 assert(OutlinedFn.arg_size() >= 2 &&((void)0)
639 "Expected at least tid and bounded tid as arguments")((void)0);
640 unsigned NumCapturedVars =
641 OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
642
643 CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
644 CI->getParent()->setName("omp_parallel");
645 Builder.SetInsertPoint(CI);
646
647 // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
648 Value *ForkCallArgs[] = {
649 Ident, Builder.getInt32(NumCapturedVars),
650 Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
651
652 SmallVector<Value *, 16> RealArgs;
653 RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
654 RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
655
656 Builder.CreateCall(RTLFn, RealArgs);
657
658 LLVM_DEBUG(dbgs() << "With fork_call placed: "do { } while (false)
659 << *Builder.GetInsertBlock()->getParent() << "\n")do { } while (false);
660
661 InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
662
663 // Initialize the local TID stack location with the argument value.
664 Builder.SetInsertPoint(PrivTID);
665 Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
666 Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
667
668 // If no "if" clause was present we do not need the call created during
669 // outlining, otherwise we reuse it in the serialized parallel region.
670 if (!ElseTI) {
671 CI->eraseFromParent();
672 } else {
673
674 // If an "if" clause was present we are now generating the serialized
675 // version into the "else" branch.
676 Builder.SetInsertPoint(ElseTI);
677
678 // Build calls __kmpc_serialized_parallel(&Ident, GTid);
679 Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
680 Builder.CreateCall(
681 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel),
682 SerializedParallelCallArgs);
683
684 // OutlinedFn(&GTid, &zero, CapturedStruct);
685 CI->removeFromParent();
686 Builder.Insert(CI);
687
688 // __kmpc_end_serialized_parallel(&Ident, GTid);
689 Value *EndArgs[] = {Ident, ThreadID};
690 Builder.CreateCall(
691 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel),
692 EndArgs);
693
694 LLVM_DEBUG(dbgs() << "With serialized parallel region: "do { } while (false)
695 << *Builder.GetInsertBlock()->getParent() << "\n")do { } while (false);
696 }
697
698 for (Instruction *I : ToBeDeleted)
699 I->eraseFromParent();
700 };
701
702 // Adjust the finalization stack, verify the adjustment, and call the
703 // finalize function a last time to finalize values between the pre-fini
704 // block and the exit block if we left the parallel "the normal way".
705 auto FiniInfo = FinalizationStack.pop_back_val();
706 (void)FiniInfo;
707 assert(FiniInfo.DK == OMPD_parallel &&((void)0)
708 "Unexpected finalization stack state!")((void)0);
709
710 Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
711
712 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
713 FiniCB(PreFiniIP);
714
715 OI.EntryBB = PRegEntryBB;
716 OI.ExitBB = PRegExitBB;
717
718 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
719 SmallVector<BasicBlock *, 32> Blocks;
720 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
721
722 // Ensure a single exit node for the outlined region by creating one.
723 // We might have multiple incoming edges to the exit now due to finalizations,
724 // e.g., cancel calls that cause the control flow to leave the region.
725 BasicBlock *PRegOutlinedExitBB = PRegExitBB;
726 PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
727 PRegOutlinedExitBB->setName("omp.par.outlined.exit");
728 Blocks.push_back(PRegOutlinedExitBB);
729
730 CodeExtractorAnalysisCache CEAC(*OuterFn);
731 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
732 /* AggregateArgs */ false,
733 /* BlockFrequencyInfo */ nullptr,
734 /* BranchProbabilityInfo */ nullptr,
735 /* AssumptionCache */ nullptr,
736 /* AllowVarArgs */ true,
737 /* AllowAlloca */ true,
738 /* Suffix */ ".omp_par");
739
740 // Find inputs to, outputs from the code region.
741 BasicBlock *CommonExit = nullptr;
742 SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
743 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
744 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
745
746 LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n")do { } while (false);
747
748 FunctionCallee TIDRTLFn =
749 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
750
751 auto PrivHelper = [&](Value &V) {
752 if (&V == TIDAddr || &V == ZeroAddr)
753 return;
754
755 SetVector<Use *> Uses;
756 for (Use &U : V.uses())
757 if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
758 if (ParallelRegionBlockSet.count(UserI->getParent()))
759 Uses.insert(&U);
760
761 // __kmpc_fork_call expects extra arguments as pointers. If the input
762 // already has a pointer type, everything is fine. Otherwise, store the
763 // value onto stack and load it back inside the to-be-outlined region. This
764 // will ensure only the pointer will be passed to the function.
765 // FIXME: if there are more than 15 trailing arguments, they must be
766 // additionally packed in a struct.
767 Value *Inner = &V;
768 if (!V.getType()->isPointerTy()) {
769 IRBuilder<>::InsertPointGuard Guard(Builder);
770 LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n")do { } while (false);
771
772 Builder.restoreIP(OuterAllocaIP);
773 Value *Ptr =
774 Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
775
776 // Store to stack at end of the block that currently branches to the entry
777 // block of the to-be-outlined region.
778 Builder.SetInsertPoint(InsertBB,
779 InsertBB->getTerminator()->getIterator());
780 Builder.CreateStore(&V, Ptr);
781
782 // Load back next to allocations in the to-be-outlined region.
783 Builder.restoreIP(InnerAllocaIP);
784 Inner = Builder.CreateLoad(V.getType(), Ptr);
785 }
786
787 Value *ReplacementValue = nullptr;
788 CallInst *CI = dyn_cast<CallInst>(&V);
789 if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
790 ReplacementValue = PrivTID;
791 } else {
792 Builder.restoreIP(
793 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
794 assert(ReplacementValue &&((void)0)
795 "Expected copy/create callback to set replacement value!")((void)0);
796 if (ReplacementValue == &V)
797 return;
798 }
799
800 for (Use *UPtr : Uses)
801 UPtr->set(ReplacementValue);
802 };
803
804 // Reset the inner alloca insertion as it will be used for loading the values
805 // wrapped into pointers before passing them into the to-be-outlined region.
806 // Configure it to insert immediately after the fake use of zero address so
807 // that they are available in the generated body and so that the
808 // OpenMP-related values (thread ID and zero address pointers) remain leading
809 // in the argument list.
810 InnerAllocaIP = IRBuilder<>::InsertPoint(
811 ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
812
813 // Reset the outer alloca insertion point to the entry of the relevant block
814 // in case it was invalidated.
815 OuterAllocaIP = IRBuilder<>::InsertPoint(
816 OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
817
818 for (Value *Input : Inputs) {
819 LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n")do { } while (false);
820 PrivHelper(*Input);
821 }
822 LLVM_DEBUG({do { } while (false)
823 for (Value *Output : Outputs)do { } while (false)
824 LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");do { } while (false)
825 })do { } while (false);
826 assert(Outputs.empty() &&((void)0)
827 "OpenMP outlining should not produce live-out values!")((void)0);
828
829 LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n")do { } while (false);
830 LLVM_DEBUG({do { } while (false)
831 for (auto *BB : Blocks)do { } while (false)
832 dbgs() << " PBR: " << BB->getName() << "\n";do { } while (false)
833 })do { } while (false);
834
835 // Register the outlined info.
836 addOutlineInfo(std::move(OI));
837
838 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
839 UI->eraseFromParent();
840
841 return AfterIP;
842}
843
844void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) {
845 // Build call void __kmpc_flush(ident_t *loc)
846 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
847 Value *Args[] = {getOrCreateIdent(SrcLocStr)};
848
849 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
850}
851
852void OpenMPIRBuilder::createFlush(const LocationDescription &Loc) {
853 if (!updateToLocation(Loc))
854 return;
855 emitFlush(Loc);
856}
857
858void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) {
859 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
860 // global_tid);
861 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
862 Value *Ident = getOrCreateIdent(SrcLocStr);
863 Value *Args[] = {Ident, getOrCreateThreadID(Ident)};
864
865 // Ignore return result until untied tasks are supported.
866 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
867 Args);
868}
869
870void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) {
871 if (!updateToLocation(Loc))
872 return;
873 emitTaskwaitImpl(Loc);
874}
875
876void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) {
877 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
878 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
879 Value *Ident = getOrCreateIdent(SrcLocStr);
880 Constant *I32Null = ConstantInt::getNullValue(Int32);
881 Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
882
883 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
884 Args);
885}
886
887void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) {
888 if (!updateToLocation(Loc))
889 return;
890 emitTaskyieldImpl(Loc);
891}
892
893OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
894 const LocationDescription &Loc, InsertPointTy AllocaIP,
895 ArrayRef<StorableBodyGenCallbackTy> SectionCBs, PrivatizeCallbackTy PrivCB,
896 FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) {
897 if (!updateToLocation(Loc))
898 return Loc.IP;
899
900 auto FiniCBWrapper = [&](InsertPointTy IP) {
901 if (IP.getBlock()->end() != IP.getPoint())
902 return FiniCB(IP);
903 // This must be done otherwise any nested constructs using FinalizeOMPRegion
904 // will fail because that function requires the Finalization Basic Block to
905 // have a terminator, which is already removed by EmitOMPRegionBody.
906 // IP is currently at cancelation block.
907 // We need to backtrack to the condition block to fetch
908 // the exit block and create a branch from cancelation
909 // to exit block.
910 IRBuilder<>::InsertPointGuard IPG(Builder);
911 Builder.restoreIP(IP);
912 auto *CaseBB = IP.getBlock()->getSinglePredecessor();
913 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
914 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
915 Instruction *I = Builder.CreateBr(ExitBB);
916 IP = InsertPointTy(I->getParent(), I->getIterator());
917 return FiniCB(IP);
918 };
919
920 FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
921
922 // Each section is emitted as a switch case
923 // Each finalization callback is handled from clang.EmitOMPSectionDirective()
924 // -> OMP.createSection() which generates the IR for each section
925 // Iterate through all sections and emit a switch construct:
926 // switch (IV) {
927 // case 0:
928 // <SectionStmt[0]>;
929 // break;
930 // ...
931 // case <NumSection> - 1:
932 // <SectionStmt[<NumSection> - 1]>;
933 // break;
934 // }
935 // ...
936 // section_loop.after:
937 // <FiniCB>;
938 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) {
939 auto *CurFn = CodeGenIP.getBlock()->getParent();
940 auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor();
941 auto *ForExitBB = CodeGenIP.getBlock()
942 ->getSinglePredecessor()
943 ->getTerminator()
944 ->getSuccessor(1);
945 SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB);
946 Builder.restoreIP(CodeGenIP);
947 unsigned CaseNumber = 0;
948 for (auto SectionCB : SectionCBs) {
949 auto *CaseBB = BasicBlock::Create(M.getContext(),
950 "omp_section_loop.body.case", CurFn);
951 SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
952 Builder.SetInsertPoint(CaseBB);
953 SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB);
954 CaseNumber++;
955 }
956 // remove the existing terminator from body BB since there can be no
957 // terminators after switch/case
958 CodeGenIP.getBlock()->getTerminator()->eraseFromParent();
959 };
960 // Loop body ends here
961 // LowerBound, UpperBound, and STride for createCanonicalLoop
962 Type *I32Ty = Type::getInt32Ty(M.getContext());
963 Value *LB = ConstantInt::get(I32Ty, 0);
964 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
965 Value *ST = ConstantInt::get(I32Ty, 1);
966 llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
967 Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
968 LoopInfo = createStaticWorkshareLoop(Loc, LoopInfo, AllocaIP, true);
969 BasicBlock *LoopAfterBB = LoopInfo->getAfter();
970 Instruction *SplitPos = LoopAfterBB->getTerminator();
971 if (!isa_and_nonnull<BranchInst>(SplitPos))
972 SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB);
973 // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB,
974 // which requires a BB with branch
975 BasicBlock *ExitBB =
976 LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end");
977 SplitPos->eraseFromParent();
978
979 // Apply the finalization callback in LoopAfterBB
980 auto FiniInfo = FinalizationStack.pop_back_val();
981 assert(FiniInfo.DK == OMPD_sections &&((void)0)
982 "Unexpected finalization stack state!")((void)0);
983 Builder.SetInsertPoint(LoopAfterBB->getTerminator());
984 FiniInfo.FiniCB(Builder.saveIP());
985 Builder.SetInsertPoint(ExitBB);
986
987 return Builder.saveIP();
988}
989
990OpenMPIRBuilder::InsertPointTy
991OpenMPIRBuilder::createSection(const LocationDescription &Loc,
992 BodyGenCallbackTy BodyGenCB,
993 FinalizeCallbackTy FiniCB) {
994 if (!updateToLocation(Loc))
995 return Loc.IP;
996
997 auto FiniCBWrapper = [&](InsertPointTy IP) {
998 if (IP.getBlock()->end() != IP.getPoint())
999 return FiniCB(IP);
1000 // This must be done otherwise any nested constructs using FinalizeOMPRegion
1001 // will fail because that function requires the Finalization Basic Block to
1002 // have a terminator, which is already removed by EmitOMPRegionBody.
1003 // IP is currently at cancelation block.
1004 // We need to backtrack to the condition block to fetch
1005 // the exit block and create a branch from cancelation
1006 // to exit block.
1007 IRBuilder<>::InsertPointGuard IPG(Builder);
1008 Builder.restoreIP(IP);
1009 auto *CaseBB = Loc.IP.getBlock();
1010 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
1011 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
1012 Instruction *I = Builder.CreateBr(ExitBB);
1013 IP = InsertPointTy(I->getParent(), I->getIterator());
1014 return FiniCB(IP);
1015 };
1016
1017 Directive OMPD = Directive::OMPD_sections;
1018 // Since we are using Finalization Callback here, HasFinalize
1019 // and IsCancellable have to be true
1020 return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper,
1021 /*Conditional*/ false, /*hasFinalize*/ true,
1022 /*IsCancellable*/ true);
1023}
1024
1025OpenMPIRBuilder::InsertPointTy
1026OpenMPIRBuilder::createMaster(const LocationDescription &Loc,
1027 BodyGenCallbackTy BodyGenCB,
1028 FinalizeCallbackTy FiniCB) {
1029
1030 if (!updateToLocation(Loc))
1031 return Loc.IP;
1032
1033 Directive OMPD = Directive::OMPD_master;
1034 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1035 Value *Ident = getOrCreateIdent(SrcLocStr);
1036 Value *ThreadId = getOrCreateThreadID(Ident);
1037 Value *Args[] = {Ident, ThreadId};
1038
1039 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
1040 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1041
1042 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
1043 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1044
1045 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1046 /*Conditional*/ true, /*hasFinalize*/ true);
1047}
1048
1049OpenMPIRBuilder::InsertPointTy
1050OpenMPIRBuilder::createMasked(const LocationDescription &Loc,
1051 BodyGenCallbackTy BodyGenCB,
1052 FinalizeCallbackTy FiniCB, Value *Filter) {
1053 if (!updateToLocation(Loc))
1054 return Loc.IP;
1055
1056 Directive OMPD = Directive::OMPD_masked;
1057 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1058 Value *Ident = getOrCreateIdent(SrcLocStr);
1059 Value *ThreadId = getOrCreateThreadID(Ident);
1060 Value *Args[] = {Ident, ThreadId, Filter};
1061 Value *ArgsEnd[] = {Ident, ThreadId};
1062
1063 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
1064 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1065
1066 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
1067 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
1068
1069 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1070 /*Conditional*/ true, /*hasFinalize*/ true);
1071}
1072
1073CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
1074 DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
1075 BasicBlock *PostInsertBefore, const Twine &Name) {
1076 Module *M = F->getParent();
1077 LLVMContext &Ctx = M->getContext();
1078 Type *IndVarTy = TripCount->getType();
8
Called C++ object pointer is null
1079
1080 // Create the basic block structure.
1081 BasicBlock *Preheader =
1082 BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore);
1083 BasicBlock *Header =
1084 BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore);
1085 BasicBlock *Cond =
1086 BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore);
1087 BasicBlock *Body =
1088 BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore);
1089 BasicBlock *Latch =
1090 BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore);
1091 BasicBlock *Exit =
1092 BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore);
1093 BasicBlock *After =
1094 BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore);
1095
1096 // Use specified DebugLoc for new instructions.
1097 Builder.SetCurrentDebugLocation(DL);
1098
1099 Builder.SetInsertPoint(Preheader);
1100 Builder.CreateBr(Header);
1101
1102 Builder.SetInsertPoint(Header);
1103 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv");
1104 IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
1105 Builder.CreateBr(Cond);
1106
1107 Builder.SetInsertPoint(Cond);
1108 Value *Cmp =
1109 Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp");
1110 Builder.CreateCondBr(Cmp, Body, Exit);
1111
1112 Builder.SetInsertPoint(Body);
1113 Builder.CreateBr(Latch);
1114
1115 Builder.SetInsertPoint(Latch);
1116 Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
1117 "omp_" + Name + ".next", /*HasNUW=*/true);
1118 Builder.CreateBr(Header);
1119 IndVarPHI->addIncoming(Next, Latch);
1120
1121 Builder.SetInsertPoint(Exit);
1122 Builder.CreateBr(After);
1123
1124 // Remember and return the canonical control flow.
1125 LoopInfos.emplace_front();
1126 CanonicalLoopInfo *CL = &LoopInfos.front();
1127
1128 CL->Preheader = Preheader;
1129 CL->Header = Header;
1130 CL->Cond = Cond;
1131 CL->Body = Body;
1132 CL->Latch = Latch;
1133 CL->Exit = Exit;
1134 CL->After = After;
1135
1136 CL->IsValid = true;
1137
1138#ifndef NDEBUG1
1139 CL->assertOK();
1140#endif
1141 return CL;
1142}
1143
1144CanonicalLoopInfo *
1145OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
1146 LoopBodyGenCallbackTy BodyGenCB,
1147 Value *TripCount, const Twine &Name) {
1148 BasicBlock *BB = Loc.IP.getBlock();
1149 BasicBlock *NextBB = BB->getNextNode();
1150
1151 CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(),
1152 NextBB, NextBB, Name);
1153 BasicBlock *After = CL->getAfter();
1154
1155 // If location is not set, don't connect the loop.
1156 if (updateToLocation(Loc)) {
1157 // Split the loop at the insertion point: Branch to the preheader and move
1158 // every following instruction to after the loop (the After BB). Also, the
1159 // new successor is the loop's after block.
1160 Builder.CreateBr(CL->Preheader);
1161 After->getInstList().splice(After->begin(), BB->getInstList(),
1162 Builder.GetInsertPoint(), BB->end());
1163 After->replaceSuccessorsPhiUsesWith(BB, After);
1164 }
1165
1166 // Emit the body content. We do it after connecting the loop to the CFG to
1167 // avoid that the callback encounters degenerate BBs.
1168 BodyGenCB(CL->getBodyIP(), CL->getIndVar());
1169
1170#ifndef NDEBUG1
1171 CL->assertOK();
1172#endif
1173 return CL;
1174}
1175
1176CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop(
1177 const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
1178 Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
1179 InsertPointTy ComputeIP, const Twine &Name) {
1180
1181 // Consider the following difficulties (assuming 8-bit signed integers):
1182 // * Adding \p Step to the loop counter which passes \p Stop may overflow:
1183 // DO I = 1, 100, 50
1184 /// * A \p Step of INT_MIN cannot not be normalized to a positive direction:
1185 // DO I = 100, 0, -128
1186
1187 // Start, Stop and Step must be of the same integer type.
1188 auto *IndVarTy = cast<IntegerType>(Start->getType());
1189 assert(IndVarTy == Stop->getType() && "Stop type mismatch")((void)0);
1190 assert(IndVarTy == Step->getType() && "Step type mismatch")((void)0);
1191
1192 LocationDescription ComputeLoc =
1193 ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
1194 updateToLocation(ComputeLoc);
1195
1196 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
1197 ConstantInt *One = ConstantInt::get(IndVarTy, 1);
1198
1199 // Like Step, but always positive.
1200 Value *Incr = Step;
1201
1202 // Distance between Start and Stop; always positive.
1203 Value *Span;
1204
1205 // Condition whether there are no iterations are executed at all, e.g. because
1206 // UB < LB.
1207 Value *ZeroCmp;
1208
1209 if (IsSigned) {
1210 // Ensure that increment is positive. If not, negate and invert LB and UB.
1211 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
1212 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
1213 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
1214 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
1215 Span = Builder.CreateSub(UB, LB, "", false, true);
1216 ZeroCmp = Builder.CreateICmp(
1217 InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB);
1218 } else {
1219 Span = Builder.CreateSub(Stop, Start, "", true);
1220 ZeroCmp = Builder.CreateICmp(
1221 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start);
1222 }
1223
1224 Value *CountIfLooping;
1225 if (InclusiveStop) {
1226 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
1227 } else {
1228 // Avoid incrementing past stop since it could overflow.
1229 Value *CountIfTwo = Builder.CreateAdd(
1230 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
1231 Value *OneCmp = Builder.CreateICmp(
1232 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr);
1233 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
1234 }
1235 Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
1236 "omp_" + Name + ".tripcount");
1237
1238 auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
1239 Builder.restoreIP(CodeGenIP);
1240 Value *Span = Builder.CreateMul(IV, Step);
1241 Value *IndVar = Builder.CreateAdd(Span, Start);
1242 BodyGenCB(Builder.saveIP(), IndVar);
1243 };
1244 LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
1245 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
1246}
1247
1248// Returns an LLVM function to call for initializing loop bounds using OpenMP
1249// static scheduling depending on `type`. Only i32 and i64 are supported by the
1250// runtime. Always interpret integers as unsigned similarly to
1251// CanonicalLoopInfo.
1252static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M,
1253 OpenMPIRBuilder &OMPBuilder) {
1254 unsigned Bitwidth = Ty->getIntegerBitWidth();
1255 if (Bitwidth == 32)
1256 return OMPBuilder.getOrCreateRuntimeFunction(
1257 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
1258 if (Bitwidth == 64)
1259 return OMPBuilder.getOrCreateRuntimeFunction(
1260 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
1261 llvm_unreachable("unknown OpenMP loop iterator bitwidth")__builtin_unreachable();
1262}
1263
1264// Sets the number of loop iterations to the given value. This value must be
1265// valid in the condition block (i.e., defined in the preheader) and is
1266// interpreted as an unsigned integer.
1267void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) {
1268 Instruction *CmpI = &CLI->getCond()->front();
1269 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount")((void)0);
1270 CmpI->setOperand(1, TripCount);
1271 CLI->assertOK();
1272}
1273
1274CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
1275 const LocationDescription &Loc, CanonicalLoopInfo *CLI,
1276 InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) {
1277 // Set up the source location value for OpenMP runtime.
1278 if (!updateToLocation(Loc))
1279 return nullptr;
1280
1281 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1282 Value *SrcLoc = getOrCreateIdent(SrcLocStr);
1283
1284 // Declare useful OpenMP runtime functions.
1285 Value *IV = CLI->getIndVar();
1286 Type *IVTy = IV->getType();
1287 FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
1288 FunctionCallee StaticFini =
1289 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
1290
1291 // Allocate space for computed loop bounds as expected by the "init" function.
1292 Builder.restoreIP(AllocaIP);
1293 Type *I32Type = Type::getInt32Ty(M.getContext());
1294 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1295 Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1296 Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1297 Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1298
1299 // At the end of the preheader, prepare for calling the "init" function by
1300 // storing the current loop bounds into the allocated space. A canonical loop
1301 // always iterates from 0 to trip-count with step 1. Note that "init" expects
1302 // and produces an inclusive upper bound.
1303 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
1304 Constant *Zero = ConstantInt::get(IVTy, 0);
1305 Constant *One = ConstantInt::get(IVTy, 1);
1306 Builder.CreateStore(Zero, PLowerBound);
1307 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
1308 Builder.CreateStore(UpperBound, PUpperBound);
1309 Builder.CreateStore(One, PStride);
1310
1311 if (!Chunk)
1312 Chunk = One;
1313
1314 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1315
1316 Constant *SchedulingType =
1317 ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static));
1318
1319 // Call the "init" function and update the trip count of the loop with the
1320 // value it produced.
1321 Builder.CreateCall(StaticInit,
1322 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
1323 PUpperBound, PStride, One, Chunk});
1324 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
1325 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
1326 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
1327 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
1328 setCanonicalLoopTripCount(CLI, TripCount);
1329
1330 // Update all uses of the induction variable except the one in the condition
1331 // block that compares it with the actual upper bound, and the increment in
1332 // the latch block.
1333 // TODO: this can eventually move to CanonicalLoopInfo or to a new
1334 // CanonicalLoopInfoUpdater interface.
1335 Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt());
1336 Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound);
1337 IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) {
1338 auto *Instr = dyn_cast<Instruction>(U.getUser());
1339 return !Instr ||
1340 (Instr->getParent() != CLI->getCond() &&
1341 Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV);
1342 });
1343
1344 // In the "exit" block, call the "fini" function.
1345 Builder.SetInsertPoint(CLI->getExit(),
1346 CLI->getExit()->getTerminator()->getIterator());
1347 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
1348
1349 // Add the barrier if requested.
1350 if (NeedsBarrier)
1351 createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
1352 omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1353 /* CheckCancelFlag */ false);
1354
1355 CLI->assertOK();
1356 return CLI;
1357}
1358
1359CanonicalLoopInfo *OpenMPIRBuilder::createWorkshareLoop(
1360 const LocationDescription &Loc, CanonicalLoopInfo *CLI,
1361 InsertPointTy AllocaIP, bool NeedsBarrier) {
1362 // Currently only supports static schedules.
1363 return createStaticWorkshareLoop(Loc, CLI, AllocaIP, NeedsBarrier);
1364}
1365
1366/// Returns an LLVM function to call for initializing loop bounds using OpenMP
1367/// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1368/// the runtime. Always interpret integers as unsigned similarly to
1369/// CanonicalLoopInfo.
1370static FunctionCallee
1371getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
1372 unsigned Bitwidth = Ty->getIntegerBitWidth();
1373 if (Bitwidth == 32)
1374 return OMPBuilder.getOrCreateRuntimeFunction(
1375 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
1376 if (Bitwidth == 64)
1377 return OMPBuilder.getOrCreateRuntimeFunction(
1378 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
1379 llvm_unreachable("unknown OpenMP loop iterator bitwidth")__builtin_unreachable();
1380}
1381
1382/// Returns an LLVM function to call for updating the next loop using OpenMP
1383/// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1384/// the runtime. Always interpret integers as unsigned similarly to
1385/// CanonicalLoopInfo.
1386static FunctionCallee
1387getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
1388 unsigned Bitwidth = Ty->getIntegerBitWidth();
1389 if (Bitwidth == 32)
1390 return OMPBuilder.getOrCreateRuntimeFunction(
1391 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
1392 if (Bitwidth == 64)
1393 return OMPBuilder.getOrCreateRuntimeFunction(
1394 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
1395 llvm_unreachable("unknown OpenMP loop iterator bitwidth")__builtin_unreachable();
1396}
1397
1398OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop(
1399 const LocationDescription &Loc, CanonicalLoopInfo *CLI,
1400 InsertPointTy AllocaIP, OMPScheduleType SchedType, bool NeedsBarrier,
1401 Value *Chunk) {
1402 // Set up the source location value for OpenMP runtime.
1403 Builder.SetCurrentDebugLocation(Loc.DL);
1404
1405 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1406 Value *SrcLoc = getOrCreateIdent(SrcLocStr);
1407
1408 // Declare useful OpenMP runtime functions.
1409 Value *IV = CLI->getIndVar();
1410 Type *IVTy = IV->getType();
1411 FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this);
1412 FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this);
1413
1414 // Allocate space for computed loop bounds as expected by the "init" function.
1415 Builder.restoreIP(AllocaIP);
1416 Type *I32Type = Type::getInt32Ty(M.getContext());
1417 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1418 Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1419 Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1420 Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1421
1422 // At the end of the preheader, prepare for calling the "init" function by
1423 // storing the current loop bounds into the allocated space. A canonical loop
1424 // always iterates from 0 to trip-count with step 1. Note that "init" expects
1425 // and produces an inclusive upper bound.
1426 BasicBlock *PreHeader = CLI->getPreheader();
1427 Builder.SetInsertPoint(PreHeader->getTerminator());
1428 Constant *One = ConstantInt::get(IVTy, 1);
1429 Builder.CreateStore(One, PLowerBound);
1430 Value *UpperBound = CLI->getTripCount();
1431 Builder.CreateStore(UpperBound, PUpperBound);
1432 Builder.CreateStore(One, PStride);
1433
1434 BasicBlock *Header = CLI->getHeader();
1435 BasicBlock *Exit = CLI->getExit();
1436 BasicBlock *Cond = CLI->getCond();
1437 InsertPointTy AfterIP = CLI->getAfterIP();
1438
1439 // The CLI will be "broken" in the code below, as the loop is no longer
1440 // a valid canonical loop.
1441
1442 if (!Chunk)
1443 Chunk = One;
1444
1445 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1446
1447 Constant *SchedulingType =
1448 ConstantInt::get(I32Type, static_cast<int>(SchedType));
1449
1450 // Call the "init" function.
1451 Builder.CreateCall(DynamicInit,
1452 {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One,
1453 UpperBound, /* step */ One, Chunk});
1454
1455 // An outer loop around the existing one.
1456 BasicBlock *OuterCond = BasicBlock::Create(
1457 PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond",
1458 PreHeader->getParent());
1459 // This needs to be 32-bit always, so can't use the IVTy Zero above.
1460 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
1461 Value *Res =
1462 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
1463 PLowerBound, PUpperBound, PStride});
1464 Constant *Zero32 = ConstantInt::get(I32Type, 0);
1465 Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32);
1466 Value *LowerBound =
1467 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb");
1468 Builder.CreateCondBr(MoreWork, Header, Exit);
1469
1470 // Change PHI-node in loop header to use outer cond rather than preheader,
1471 // and set IV to the LowerBound.
1472 Instruction *Phi = &Header->front();
1473 auto *PI = cast<PHINode>(Phi);
1474 PI->setIncomingBlock(0, OuterCond);
1475 PI->setIncomingValue(0, LowerBound);
1476
1477 // Then set the pre-header to jump to the OuterCond
1478 Instruction *Term = PreHeader->getTerminator();
1479 auto *Br = cast<BranchInst>(Term);
1480 Br->setSuccessor(0, OuterCond);
1481
1482 // Modify the inner condition:
1483 // * Use the UpperBound returned from the DynamicNext call.
1484 // * jump to the loop outer loop when done with one of the inner loops.
1485 Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt());
1486 UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub");
1487 Instruction *Comp = &*Builder.GetInsertPoint();
1488 auto *CI = cast<CmpInst>(Comp);
1489 CI->setOperand(1, UpperBound);
1490 // Redirect the inner exit to branch to outer condition.
1491 Instruction *Branch = &Cond->back();
1492 auto *BI = cast<BranchInst>(Branch);
1493 assert(BI->getSuccessor(1) == Exit)((void)0);
1494 BI->setSuccessor(1, OuterCond);
1495
1496 // Add the barrier if requested.
1497 if (NeedsBarrier) {
1498 Builder.SetInsertPoint(&Exit->back());
1499 createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
1500 omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1501 /* CheckCancelFlag */ false);
1502 }
1503
1504 return AfterIP;
1505}
1506
1507/// Make \p Source branch to \p Target.
1508///
1509/// Handles two situations:
1510/// * \p Source already has an unconditional branch.
1511/// * \p Source is a degenerate block (no terminator because the BB is
1512/// the current head of the IR construction).
1513static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) {
1514 if (Instruction *Term = Source->getTerminator()) {
1515 auto *Br = cast<BranchInst>(Term);
1516 assert(!Br->isConditional() &&((void)0)
1517 "BB's terminator must be an unconditional branch (or degenerate)")((void)0);
1518 BasicBlock *Succ = Br->getSuccessor(0);
1519 Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
1520 Br->setSuccessor(0, Target);
1521 return;
1522 }
1523
1524 auto *NewBr = BranchInst::Create(Target, Source);
1525 NewBr->setDebugLoc(DL);
1526}
1527
1528/// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
1529/// after this \p OldTarget will be orphaned.
1530static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
1531 BasicBlock *NewTarget, DebugLoc DL) {
1532 for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
1533 redirectTo(Pred, NewTarget, DL);
1534}
1535
1536/// Determine which blocks in \p BBs are reachable from outside and remove the
1537/// ones that are not reachable from the function.
1538static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) {
1539 SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
1540 auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
1541 for (Use &U : BB->uses()) {
1542 auto *UseInst = dyn_cast<Instruction>(U.getUser());
1543 if (!UseInst)
1544 continue;
1545 if (BBsToErase.count(UseInst->getParent()))
1546 continue;
1547 return true;
1548 }
1549 return false;
1550 };
1551
1552 while (true) {
1553 bool Changed = false;
1554 for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
1555 if (HasRemainingUses(BB)) {
1556 BBsToErase.erase(BB);
1557 Changed = true;
1558 }
1559 }
1560 if (!Changed)
1561 break;
1562 }
1563
1564 SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
1565 DeleteDeadBlocks(BBVec);
1566}
1567
1568CanonicalLoopInfo *
1569OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
1570 InsertPointTy ComputeIP) {
1571 assert(Loops.size() >= 1 && "At least one loop required")((void)0);
1572 size_t NumLoops = Loops.size();
1573
1574 // Nothing to do if there is already just one loop.
1575 if (NumLoops == 1)
1
Assuming 'NumLoops' is not equal to 1
2
Taking false branch
1576 return Loops.front();
1577
1578 CanonicalLoopInfo *Outermost = Loops.front();
1579 CanonicalLoopInfo *Innermost = Loops.back();
1580 BasicBlock *OrigPreheader = Outermost->getPreheader();
1581 BasicBlock *OrigAfter = Outermost->getAfter();
1582 Function *F = OrigPreheader->getParent();
1583
1584 // Setup the IRBuilder for inserting the trip count computation.
1585 Builder.SetCurrentDebugLocation(DL);
1586 if (ComputeIP.isSet())
3
Taking true branch
1587 Builder.restoreIP(ComputeIP);
1588 else
1589 Builder.restoreIP(Outermost->getPreheaderIP());
1590
1591 // Derive the collapsed' loop trip count.
1592 // TODO: Find common/largest indvar type.
1593 Value *CollapsedTripCount = nullptr;
4
'CollapsedTripCount' initialized to a null pointer value
1594 for (CanonicalLoopInfo *L : Loops) {
5
Assuming '__begin1' is equal to '__end1'
1595 Value *OrigTripCount = L->getTripCount();
1596 if (!CollapsedTripCount) {
1597 CollapsedTripCount = OrigTripCount;
1598 continue;
1599 }
1600
1601 // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
1602 CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount,
1603 {}, /*HasNUW=*/true);
1604 }
1605
1606 // Create the collapsed loop control flow.
1607 CanonicalLoopInfo *Result =
1608 createLoopSkeleton(DL, CollapsedTripCount, F,
6
Passing null pointer value via 2nd parameter 'TripCount'
7
Calling 'OpenMPIRBuilder::createLoopSkeleton'
1609 OrigPreheader->getNextNode(), OrigAfter, "collapsed");
1610
1611 // Build the collapsed loop body code.
1612 // Start with deriving the input loop induction variables from the collapsed
1613 // one, using a divmod scheme. To preserve the original loops' order, the
1614 // innermost loop use the least significant bits.
1615 Builder.restoreIP(Result->getBodyIP());
1616
1617 Value *Leftover = Result->getIndVar();
1618 SmallVector<Value *> NewIndVars;
1619 NewIndVars.set_size(NumLoops);
1620 for (int i = NumLoops - 1; i >= 1; --i) {
1621 Value *OrigTripCount = Loops[i]->getTripCount();
1622
1623 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
1624 NewIndVars[i] = NewIndVar;
1625
1626 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
1627 }
1628 // Outermost loop gets all the remaining bits.
1629 NewIndVars[0] = Leftover;
1630
1631 // Construct the loop body control flow.
1632 // We progressively construct the branch structure following in direction of
1633 // the control flow, from the leading in-between code, the loop nest body, the
1634 // trailing in-between code, and rejoining the collapsed loop's latch.
1635 // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If
1636 // the ContinueBlock is set, continue with that block. If ContinuePred, use
1637 // its predecessors as sources.
1638 BasicBlock *ContinueBlock = Result->getBody();
1639 BasicBlock *ContinuePred = nullptr;
1640 auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest,
1641 BasicBlock *NextSrc) {
1642 if (ContinueBlock)
1643 redirectTo(ContinueBlock, Dest, DL);
1644 else
1645 redirectAllPredecessorsTo(ContinuePred, Dest, DL);
1646
1647 ContinueBlock = nullptr;
1648 ContinuePred = NextSrc;
1649 };
1650
1651 // The code before the nested loop of each level.
1652 // Because we are sinking it into the nest, it will be executed more often
1653 // that the original loop. More sophisticated schemes could keep track of what
1654 // the in-between code is and instantiate it only once per thread.
1655 for (size_t i = 0; i < NumLoops - 1; ++i)
1656 ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader());
1657
1658 // Connect the loop nest body.
1659 ContinueWith(Innermost->getBody(), Innermost->getLatch());
1660
1661 // The code after the nested loop at each level.
1662 for (size_t i = NumLoops - 1; i > 0; --i)
1663 ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch());
1664
1665 // Connect the finished loop to the collapsed loop latch.
1666 ContinueWith(Result->getLatch(), nullptr);
1667
1668 // Replace the input loops with the new collapsed loop.
1669 redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL);
1670 redirectTo(Result->getAfter(), Outermost->getAfter(), DL);
1671
1672 // Replace the input loop indvars with the derived ones.
1673 for (size_t i = 0; i < NumLoops; ++i)
1674 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
1675
1676 // Remove unused parts of the input loops.
1677 SmallVector<BasicBlock *, 12> OldControlBBs;
1678 OldControlBBs.reserve(6 * Loops.size());
1679 for (CanonicalLoopInfo *Loop : Loops)
1680 Loop->collectControlBlocks(OldControlBBs);
1681 removeUnusedBlocksFromParent(OldControlBBs);
1682
1683#ifndef NDEBUG1
1684 Result->assertOK();
1685#endif
1686 return Result;
1687}
1688
1689std::vector<CanonicalLoopInfo *>
1690OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
1691 ArrayRef<Value *> TileSizes) {
1692 assert(TileSizes.size() == Loops.size() &&((void)0)
1693 "Must pass as many tile sizes as there are loops")((void)0);
1694 int NumLoops = Loops.size();
1695 assert(NumLoops >= 1 && "At least one loop to tile required")((void)0);
1696
1697 CanonicalLoopInfo *OutermostLoop = Loops.front();
1698 CanonicalLoopInfo *InnermostLoop = Loops.back();
1699 Function *F = OutermostLoop->getBody()->getParent();
1700 BasicBlock *InnerEnter = InnermostLoop->getBody();
1701 BasicBlock *InnerLatch = InnermostLoop->getLatch();
1702
1703 // Collect original trip counts and induction variable to be accessible by
1704 // index. Also, the structure of the original loops is not preserved during
1705 // the construction of the tiled loops, so do it before we scavenge the BBs of
1706 // any original CanonicalLoopInfo.
1707 SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
1708 for (CanonicalLoopInfo *L : Loops) {
1709 OrigTripCounts.push_back(L->getTripCount());
1710 OrigIndVars.push_back(L->getIndVar());
1711 }
1712
1713 // Collect the code between loop headers. These may contain SSA definitions
1714 // that are used in the loop nest body. To be usable with in the innermost
1715 // body, these BasicBlocks will be sunk into the loop nest body. That is,
1716 // these instructions may be executed more often than before the tiling.
1717 // TODO: It would be sufficient to only sink them into body of the
1718 // corresponding tile loop.
1719 SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode;
1720 for (int i = 0; i < NumLoops - 1; ++i) {
1721 CanonicalLoopInfo *Surrounding = Loops[i];
1722 CanonicalLoopInfo *Nested = Loops[i + 1];
1723
1724 BasicBlock *EnterBB = Surrounding->getBody();
1725 BasicBlock *ExitBB = Nested->getHeader();
1726 InbetweenCode.emplace_back(EnterBB, ExitBB);
1727 }
1728
1729 // Compute the trip counts of the floor loops.
1730 Builder.SetCurrentDebugLocation(DL);
1731 Builder.restoreIP(OutermostLoop->getPreheaderIP());
1732 SmallVector<Value *, 4> FloorCount, FloorRems;
1733 for (int i = 0; i < NumLoops; ++i) {
1734 Value *TileSize = TileSizes[i];
1735 Value *OrigTripCount = OrigTripCounts[i];
1736 Type *IVType = OrigTripCount->getType();
1737
1738 Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
1739 Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
1740
1741 // 0 if tripcount divides the tilesize, 1 otherwise.
1742 // 1 means we need an additional iteration for a partial tile.
1743 //
1744 // Unfortunately we cannot just use the roundup-formula
1745 // (tripcount + tilesize - 1)/tilesize
1746 // because the summation might overflow. We do not want introduce undefined
1747 // behavior when the untiled loop nest did not.
1748 Value *FloorTripOverflow =
1749 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
1750
1751 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
1752 FloorTripCount =
1753 Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
1754 "omp_floor" + Twine(i) + ".tripcount", true);
1755
1756 // Remember some values for later use.
1757 FloorCount.push_back(FloorTripCount);
1758 FloorRems.push_back(FloorTripRem);
1759 }
1760
1761 // Generate the new loop nest, from the outermost to the innermost.
1762 std::vector<CanonicalLoopInfo *> Result;
1763 Result.reserve(NumLoops * 2);
1764
1765 // The basic block of the surrounding loop that enters the nest generated
1766 // loop.
1767 BasicBlock *Enter = OutermostLoop->getPreheader();
1768
1769 // The basic block of the surrounding loop where the inner code should
1770 // continue.
1771 BasicBlock *Continue = OutermostLoop->getAfter();
1772
1773 // Where the next loop basic block should be inserted.
1774 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
1775
1776 auto EmbeddNewLoop =
1777 [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
1778 Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
1779 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
1780 DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
1781 redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
1782 redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
1783
1784 // Setup the position where the next embedded loop connects to this loop.
1785 Enter = EmbeddedLoop->getBody();
1786 Continue = EmbeddedLoop->getLatch();
1787 OutroInsertBefore = EmbeddedLoop->getLatch();
1788 return EmbeddedLoop;
1789 };
1790
1791 auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
1792 const Twine &NameBase) {
1793 for (auto P : enumerate(TripCounts)) {
1794 CanonicalLoopInfo *EmbeddedLoop =
1795 EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
1796 Result.push_back(EmbeddedLoop);
1797 }
1798 };
1799
1800 EmbeddNewLoops(FloorCount, "floor");
1801
1802 // Within the innermost floor loop, emit the code that computes the tile
1803 // sizes.
1804 Builder.SetInsertPoint(Enter->getTerminator());
1805 SmallVector<Value *, 4> TileCounts;
1806 for (int i = 0; i < NumLoops; ++i) {
1807 CanonicalLoopInfo *FloorLoop = Result[i];
1808 Value *TileSize = TileSizes[i];
1809
1810 Value *FloorIsEpilogue =
1811 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
1812 Value *TileTripCount =
1813 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
1814
1815 TileCounts.push_back(TileTripCount);
1816 }
1817
1818 // Create the tile loops.
1819 EmbeddNewLoops(TileCounts, "tile");
1820
1821 // Insert the inbetween code into the body.
1822 BasicBlock *BodyEnter = Enter;
1823 BasicBlock *BodyEntered = nullptr;
1824 for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
1825 BasicBlock *EnterBB = P.first;
1826 BasicBlock *ExitBB = P.second;
1827
1828 if (BodyEnter)
1829 redirectTo(BodyEnter, EnterBB, DL);
1830 else
1831 redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
1832
1833 BodyEnter = nullptr;
1834 BodyEntered = ExitBB;
1835 }
1836
1837 // Append the original loop nest body into the generated loop nest body.
1838 if (BodyEnter)
1839 redirectTo(BodyEnter, InnerEnter, DL);
1840 else
1841 redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
1842 redirectAllPredecessorsTo(InnerLatch, Continue, DL);
1843
1844 // Replace the original induction variable with an induction variable computed
1845 // from the tile and floor induction variables.
1846 Builder.restoreIP(Result.back()->getBodyIP());
1847 for (int i = 0; i < NumLoops; ++i) {
1848 CanonicalLoopInfo *FloorLoop = Result[i];
1849 CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
1850 Value *OrigIndVar = OrigIndVars[i];
1851 Value *Size = TileSizes[i];
1852
1853 Value *Scale =
1854 Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
1855 Value *Shift =
1856 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
1857 OrigIndVar->replaceAllUsesWith(Shift);
1858 }
1859
1860 // Remove unused parts of the original loops.
1861 SmallVector<BasicBlock *, 12> OldControlBBs;
1862 OldControlBBs.reserve(6 * Loops.size());
1863 for (CanonicalLoopInfo *Loop : Loops)
1864 Loop->collectControlBlocks(OldControlBBs);
1865 removeUnusedBlocksFromParent(OldControlBBs);
1866
1867#ifndef NDEBUG1
1868 for (CanonicalLoopInfo *GenL : Result)
1869 GenL->assertOK();
1870#endif
1871 return Result;
1872}
1873
1874OpenMPIRBuilder::InsertPointTy
1875OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
1876 llvm::Value *BufSize, llvm::Value *CpyBuf,
1877 llvm::Value *CpyFn, llvm::Value *DidIt) {
1878 if (!updateToLocation(Loc))
1879 return Loc.IP;
1880
1881 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1882 Value *Ident = getOrCreateIdent(SrcLocStr);
1883 Value *ThreadId = getOrCreateThreadID(Ident);
1884
1885 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
1886
1887 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
1888
1889 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
1890 Builder.CreateCall(Fn, Args);
1891
1892 return Builder.saveIP();
1893}
1894
1895OpenMPIRBuilder::InsertPointTy
1896OpenMPIRBuilder::createSingle(const LocationDescription &Loc,
1897 BodyGenCallbackTy BodyGenCB,
1898 FinalizeCallbackTy FiniCB, llvm::Value *DidIt) {
1899
1900 if (!updateToLocation(Loc))
1901 return Loc.IP;
1902
1903 // If needed (i.e. not null), initialize `DidIt` with 0
1904 if (DidIt) {
1905 Builder.CreateStore(Builder.getInt32(0), DidIt);
1906 }
1907
1908 Directive OMPD = Directive::OMPD_single;
1909 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1910 Value *Ident = getOrCreateIdent(SrcLocStr);
1911 Value *ThreadId = getOrCreateThreadID(Ident);
1912 Value *Args[] = {Ident, ThreadId};
1913
1914 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
1915 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1916
1917 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
1918 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1919
1920 // generates the following:
1921 // if (__kmpc_single()) {
1922 // .... single region ...
1923 // __kmpc_end_single
1924 // }
1925
1926 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1927 /*Conditional*/ true, /*hasFinalize*/ true);
1928}
1929
1930OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical(
1931 const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
1932 FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) {
1933
1934 if (!updateToLocation(Loc))
1935 return Loc.IP;
1936
1937 Directive OMPD = Directive::OMPD_critical;
1938 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1939 Value *Ident = getOrCreateIdent(SrcLocStr);
1940 Value *ThreadId = getOrCreateThreadID(Ident);
1941 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
1942 Value *Args[] = {Ident, ThreadId, LockVar};
1943
1944 SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), std::end(Args));
1945 Function *RTFn = nullptr;
1946 if (HintInst) {
1947 // Add Hint to entry Args and create call
1948 EnterArgs.push_back(HintInst);
1949 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
1950 } else {
1951 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
1952 }
1953 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
1954
1955 Function *ExitRTLFn =
1956 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
1957 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1958
1959 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1960 /*Conditional*/ false, /*hasFinalize*/ true);
1961}
1962
1963OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
1964 Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
1965 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
1966 bool HasFinalize, bool IsCancellable) {
1967
1968 if (HasFinalize)
1969 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
1970
1971 // Create inlined region's entry and body blocks, in preparation
1972 // for conditional creation
1973 BasicBlock *EntryBB = Builder.GetInsertBlock();
1974 Instruction *SplitPos = EntryBB->getTerminator();
1975 if (!isa_and_nonnull<BranchInst>(SplitPos))
1976 SplitPos = new UnreachableInst(Builder.getContext(), EntryBB);
1977 BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end");
1978 BasicBlock *FiniBB =
1979 EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize");
1980
1981 Builder.SetInsertPoint(EntryBB->getTerminator());
1982 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
1983
1984 // generate body
1985 BodyGenCB(/* AllocaIP */ InsertPointTy(),
1986 /* CodeGenIP */ Builder.saveIP(), *FiniBB);
1987
1988 // If we didn't emit a branch to FiniBB during body generation, it means
1989 // FiniBB is unreachable (e.g. while(1);). stop generating all the
1990 // unreachable blocks, and remove anything we are not going to use.
1991 auto SkipEmittingRegion = FiniBB->hasNPredecessors(0);
1992 if (SkipEmittingRegion) {
1993 FiniBB->eraseFromParent();
1994 ExitCall->eraseFromParent();
1995 // Discard finalization if we have it.
1996 if (HasFinalize) {
1997 assert(!FinalizationStack.empty() &&((void)0)
1998 "Unexpected finalization stack state!")((void)0);
1999 FinalizationStack.pop_back();
2000 }
2001 } else {
2002 // emit exit call and do any needed finalization.
2003 auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
2004 assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&((void)0)
2005 FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&((void)0)
2006 "Unexpected control flow graph state!!")((void)0);
2007 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
2008 assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&((void)0)
2009 "Unexpected Control Flow State!")((void)0);
2010 MergeBlockIntoPredecessor(FiniBB);
2011 }
2012
2013 // If we are skipping the region of a non conditional, remove the exit
2014 // block, and clear the builder's insertion point.
2015 assert(SplitPos->getParent() == ExitBB &&((void)0)
2016 "Unexpected Insertion point location!")((void)0);
2017 if (!Conditional && SkipEmittingRegion) {
2018 ExitBB->eraseFromParent();
2019 Builder.ClearInsertionPoint();
2020 } else {
2021 auto merged = MergeBlockIntoPredecessor(ExitBB);
2022 BasicBlock *ExitPredBB = SplitPos->getParent();
2023 auto InsertBB = merged ? ExitPredBB : ExitBB;
2024 if (!isa_and_nonnull<BranchInst>(SplitPos))
2025 SplitPos->eraseFromParent();
2026 Builder.SetInsertPoint(InsertBB);
2027 }
2028
2029 return Builder.saveIP();
2030}
2031
2032OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
2033 Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) {
2034 // if nothing to do, Return current insertion point.
2035 if (!Conditional || !EntryCall)
2036 return Builder.saveIP();
2037
2038 BasicBlock *EntryBB = Builder.GetInsertBlock();
2039 Value *CallBool = Builder.CreateIsNotNull(EntryCall);
2040 auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body");
2041 auto *UI = new UnreachableInst(Builder.getContext(), ThenBB);
2042
2043 // Emit thenBB and set the Builder's insertion point there for
2044 // body generation next. Place the block after the current block.
2045 Function *CurFn = EntryBB->getParent();
2046 CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB);
2047
2048 // Move Entry branch to end of ThenBB, and replace with conditional
2049 // branch (If-stmt)
2050 Instruction *EntryBBTI = EntryBB->getTerminator();
2051 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
2052 EntryBBTI->removeFromParent();
2053 Builder.SetInsertPoint(UI);
2054 Builder.Insert(EntryBBTI);
2055 UI->eraseFromParent();
2056 Builder.SetInsertPoint(ThenBB->getTerminator());
2057
2058 // return an insertion point to ExitBB.
2059 return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
2060}
2061
2062OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
2063 omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall,
2064 bool HasFinalize) {
2065
2066 Builder.restoreIP(FinIP);
2067
2068 // If there is finalization to do, emit it before the exit call
2069 if (HasFinalize) {
2070 assert(!FinalizationStack.empty() &&((void)0)
2071 "Unexpected finalization stack state!")((void)0);
2072
2073 FinalizationInfo Fi = FinalizationStack.pop_back_val();
2074 assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!")((void)0);
2075
2076 Fi.FiniCB(FinIP);
2077
2078 BasicBlock *FiniBB = FinIP.getBlock();
2079 Instruction *FiniBBTI = FiniBB->getTerminator();
2080
2081 // set Builder IP for call creation
2082 Builder.SetInsertPoint(FiniBBTI);
2083 }
2084
2085 if (!ExitCall)
2086 return Builder.saveIP();
2087
2088 // place the Exitcall as last instruction before Finalization block terminator
2089 ExitCall->removeFromParent();
2090 Builder.Insert(ExitCall);
2091
2092 return IRBuilder<>::InsertPoint(ExitCall->getParent(),
2093 ExitCall->getIterator());
2094}
2095
2096OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
2097 InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr,
2098 llvm::IntegerType *IntPtrTy, bool BranchtoEnd) {
2099 if (!IP.isSet())
2100 return IP;
2101
2102 IRBuilder<>::InsertPointGuard IPG(Builder);
2103
2104 // creates the following CFG structure
2105 // OMP_Entry : (MasterAddr != PrivateAddr)?
2106 // F T
2107 // | \
2108 // | copin.not.master
2109 // | /
2110 // v /
2111 // copyin.not.master.end
2112 // |
2113 // v
2114 // OMP.Entry.Next
2115
2116 BasicBlock *OMP_Entry = IP.getBlock();
2117 Function *CurFn = OMP_Entry->getParent();
2118 BasicBlock *CopyBegin =
2119 BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn);
2120 BasicBlock *CopyEnd = nullptr;
2121
2122 // If entry block is terminated, split to preserve the branch to following
2123 // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is.
2124 if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) {
2125 CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(),
2126 "copyin.not.master.end");
2127 OMP_Entry->getTerminator()->eraseFromParent();
2128 } else {
2129 CopyEnd =
2130 BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn);
2131 }
2132
2133 Builder.SetInsertPoint(OMP_Entry);
2134 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
2135 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
2136 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
2137 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
2138
2139 Builder.SetInsertPoint(CopyBegin);
2140 if (BranchtoEnd)
2141 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
2142
2143 return Builder.saveIP();
2144}
2145
2146CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc,
2147 Value *Size, Value *Allocator,
2148 std::string Name) {
2149 IRBuilder<>::InsertPointGuard IPG(Builder);
2150 Builder.restoreIP(Loc.IP);
2151
2152 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2153 Value *Ident = getOrCreateIdent(SrcLocStr);
2154 Value *ThreadId = getOrCreateThreadID(Ident);
2155 Value *Args[] = {ThreadId, Size, Allocator};
2156
2157 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
2158
2159 return Builder.CreateCall(Fn, Args, Name);
2160}
2161
2162CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc,
2163 Value *Addr, Value *Allocator,
2164 std::string Name) {
2165 IRBuilder<>::InsertPointGuard IPG(Builder);
2166 Builder.restoreIP(Loc.IP);
2167
2168 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2169 Value *Ident = getOrCreateIdent(SrcLocStr);
2170 Value *ThreadId = getOrCreateThreadID(Ident);
2171 Value *Args[] = {ThreadId, Addr, Allocator};
2172 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
2173 return Builder.CreateCall(Fn, Args, Name);
2174}
2175
2176CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
2177 const LocationDescription &Loc, llvm::Value *Pointer,
2178 llvm::ConstantInt *Size, const llvm::Twine &Name) {
2179 IRBuilder<>::InsertPointGuard IPG(Builder);
2180 Builder.restoreIP(Loc.IP);
2181
2182 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2183 Value *Ident = getOrCreateIdent(SrcLocStr);
2184 Value *ThreadId = getOrCreateThreadID(Ident);
2185 Constant *ThreadPrivateCache =
2186 getOrCreateOMPInternalVariable(Int8PtrPtr, Name);
2187 llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
2188
2189 Function *Fn =
2190 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
2191
2192 return Builder.CreateCall(Fn, Args);
2193}
2194
2195OpenMPIRBuilder::InsertPointTy
2196OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime) {
2197 if (!updateToLocation(Loc))
2198 return Loc.IP;
2199
2200 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2201 Value *Ident = getOrCreateIdent(SrcLocStr);
2202 ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
2203 ConstantInt *UseGenericStateMachine =
2204 ConstantInt::getBool(Int32->getContext(), !IsSPMD);
2205 ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
2206
2207 Function *Fn = getOrCreateRuntimeFunctionPtr(
2208 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
2209
2210 CallInst *ThreadKind =
2211 Builder.CreateCall(Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
2212
2213 Value *ExecUserCode = Builder.CreateICmpEQ(
2214 ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), "exec_user_code");
2215
2216 // ThreadKind = __kmpc_target_init(...)
2217 // if (ThreadKind == -1)
2218 // user_code
2219 // else
2220 // return;
2221
2222 auto *UI = Builder.CreateUnreachable();
2223 BasicBlock *CheckBB = UI->getParent();
2224 BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry");
2225
2226 BasicBlock *WorkerExitBB = BasicBlock::Create(
2227 CheckBB->getContext(), "worker.exit", CheckBB->getParent());
2228 Builder.SetInsertPoint(WorkerExitBB);
2229 Builder.CreateRetVoid();
2230
2231 auto *CheckBBTI = CheckBB->getTerminator();
2232 Builder.SetInsertPoint(CheckBBTI);
2233 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
2234
2235 CheckBBTI->eraseFromParent();
2236 UI->eraseFromParent();
2237
2238 // Continue in the "user_code" block, see diagram above and in
2239 // openmp/libomptarget/deviceRTLs/common/include/target.h .
2240 return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
2241}
2242
2243void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
2244 bool IsSPMD, bool RequiresFullRuntime) {
2245 if (!updateToLocation(Loc))
2246 return;
2247
2248 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2249 Value *Ident = getOrCreateIdent(SrcLocStr);
2250 ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
2251 ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
2252
2253 Function *Fn = getOrCreateRuntimeFunctionPtr(
2254 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
2255
2256 Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal});
2257}
2258
2259std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
2260 StringRef FirstSeparator,
2261 StringRef Separator) {
2262 SmallString<128> Buffer;
2263 llvm::raw_svector_ostream OS(Buffer);
2264 StringRef Sep = FirstSeparator;
2265 for (StringRef Part : Parts) {
2266 OS << Sep << Part;
2267 Sep = Separator;
2268 }
2269 return OS.str().str();
2270}
2271
2272Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable(
2273 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2274 // TODO: Replace the twine arg with stringref to get rid of the conversion
2275 // logic. However This is taken from current implementation in clang as is.
2276 // Since this method is used in many places exclusively for OMP internal use
2277 // we will keep it as is for temporarily until we move all users to the
2278 // builder and then, if possible, fix it everywhere in one go.
2279 SmallString<256> Buffer;
2280 llvm::raw_svector_ostream Out(Buffer);
2281 Out << Name;
2282 StringRef RuntimeName = Out.str();
2283 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2284 if (Elem.second) {
2285 assert(Elem.second->getType()->getPointerElementType() == Ty &&((void)0)
2286 "OMP internal variable has different type than requested")((void)0);
2287 } else {
2288 // TODO: investigate the appropriate linkage type used for the global
2289 // variable for possibly changing that to internal or private, or maybe
2290 // create different versions of the function for different OMP internal
2291 // variables.
2292 Elem.second = new llvm::GlobalVariable(
2293 M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage,
2294 llvm::Constant::getNullValue(Ty), Elem.first(),
2295 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
2296 AddressSpace);
2297 }
2298
2299 return Elem.second;
2300}
2301
2302Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
2303 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2304 std::string Name = getNameWithSeparators({Prefix, "var"}, ".", ".");
2305 return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name);
2306}
2307
2308GlobalVariable *
2309OpenMPIRBuilder::createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
2310 std::string VarName) {
2311 llvm::Constant *MaptypesArrayInit =
2312 llvm::ConstantDataArray::get(M.getContext(), Mappings);
2313 auto *MaptypesArrayGlobal = new llvm::GlobalVariable(
2314 M, MaptypesArrayInit->getType(),
2315 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit,
2316 VarName);
2317 MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2318 return MaptypesArrayGlobal;
2319}
2320
2321void OpenMPIRBuilder::createMapperAllocas(const LocationDescription &Loc,
2322 InsertPointTy AllocaIP,
2323 unsigned NumOperands,
2324 struct MapperAllocas &MapperAllocas) {
2325 if (!updateToLocation(Loc))
2326 return;
2327
2328 auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
2329 auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
2330 Builder.restoreIP(AllocaIP);
2331 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI8PtrTy);
2332 AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy);
2333 AllocaInst *ArgSizes = Builder.CreateAlloca(ArrI64Ty);
2334 Builder.restoreIP(Loc.IP);
2335 MapperAllocas.ArgsBase = ArgsBase;
2336 MapperAllocas.Args = Args;
2337 MapperAllocas.ArgSizes = ArgSizes;
2338}
2339
2340void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc,
2341 Function *MapperFunc, Value *SrcLocInfo,
2342 Value *MaptypesArg, Value *MapnamesArg,
2343 struct MapperAllocas &MapperAllocas,
2344 int64_t DeviceID, unsigned NumOperands) {
2345 if (!updateToLocation(Loc))
2346 return;
2347
2348 auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
2349 auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
2350 Value *ArgsBaseGEP =
2351 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
2352 {Builder.getInt32(0), Builder.getInt32(0)});
2353 Value *ArgsGEP =
2354 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
2355 {Builder.getInt32(0), Builder.getInt32(0)});
2356 Value *ArgSizesGEP =
2357 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
2358 {Builder.getInt32(0), Builder.getInt32(0)});
2359 Value *NullPtr = Constant::getNullValue(Int8Ptr->getPointerTo());
2360 Builder.CreateCall(MapperFunc,
2361 {SrcLocInfo, Builder.getInt64(DeviceID),
2362 Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
2363 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
2364}
2365
2366bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
2367 const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) {
2368 assert(!(AO == AtomicOrdering::NotAtomic ||((void)0)
2369 AO == llvm::AtomicOrdering::Unordered) &&((void)0)
2370 "Unexpected Atomic Ordering.")((void)0);
2371
2372 bool Flush = false;
2373 llvm::AtomicOrdering FlushAO = AtomicOrdering::Monotonic;
2374
2375 switch (AK) {
2376 case Read:
2377 if (AO == AtomicOrdering::Acquire || AO == AtomicOrdering::AcquireRelease ||
2378 AO == AtomicOrdering::SequentiallyConsistent) {
2379 FlushAO = AtomicOrdering::Acquire;
2380 Flush = true;
2381 }
2382 break;
2383 case Write:
2384 case Update:
2385 if (AO == AtomicOrdering::Release || AO == AtomicOrdering::AcquireRelease ||
2386 AO == AtomicOrdering::SequentiallyConsistent) {
2387 FlushAO = AtomicOrdering::Release;
2388 Flush = true;
2389 }
2390 break;
2391 case Capture:
2392 switch (AO) {
2393 case AtomicOrdering::Acquire:
2394 FlushAO = AtomicOrdering::Acquire;
2395 Flush = true;
2396 break;
2397 case AtomicOrdering::Release:
2398 FlushAO = AtomicOrdering::Release;
2399 Flush = true;
2400 break;
2401 case AtomicOrdering::AcquireRelease:
2402 case AtomicOrdering::SequentiallyConsistent:
2403 FlushAO = AtomicOrdering::AcquireRelease;
2404 Flush = true;
2405 break;
2406 default:
2407 // do nothing - leave silently.
2408 break;
2409 }
2410 }
2411
2412 if (Flush) {
2413 // Currently Flush RT call still doesn't take memory_ordering, so for when
2414 // that happens, this tries to do the resolution of which atomic ordering
2415 // to use with but issue the flush call
2416 // TODO: pass `FlushAO` after memory ordering support is added
2417 (void)FlushAO;
2418 emitFlush(Loc);
2419 }
2420
2421 // for AO == AtomicOrdering::Monotonic and all other case combinations
2422 // do nothing
2423 return Flush;
2424}
2425
2426OpenMPIRBuilder::InsertPointTy
2427OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
2428 AtomicOpValue &X, AtomicOpValue &V,
2429 AtomicOrdering AO) {
2430 if (!updateToLocation(Loc))
2431 return Loc.IP;
2432
2433 Type *XTy = X.Var->getType();
2434 assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory")((void)0);
2435 Type *XElemTy = XTy->getPointerElementType();
2436 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||((void)0)
2437 XElemTy->isPointerTy()) &&((void)0)
2438 "OMP atomic read expected a scalar type")((void)0);
2439
2440 Value *XRead = nullptr;
2441
2442 if (XElemTy->isIntegerTy()) {
2443 LoadInst *XLD =
2444 Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read");
2445 XLD->setAtomic(AO);
2446 XRead = cast<Value>(XLD);
2447 } else {
2448 // We need to bitcast and perform atomic op as integer
2449 unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
2450 IntegerType *IntCastTy =
2451 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
2452 Value *XBCast = Builder.CreateBitCast(
2453 X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast");
2454 LoadInst *XLoad =
2455 Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load");
2456 XLoad->setAtomic(AO);
2457 if (XElemTy->isFloatingPointTy()) {
2458 XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast");
2459 } else {
2460 XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast");
2461 }
2462 }
2463 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
2464 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
2465 return Builder.saveIP();
2466}
2467
2468OpenMPIRBuilder::InsertPointTy
2469OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
2470 AtomicOpValue &X, Value *Expr,
2471 AtomicOrdering AO) {
2472 if (!updateToLocation(Loc))
2473 return Loc.IP;
2474
2475 Type *XTy = X.Var->getType();
2476 assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory")((void)0);
2477 Type *XElemTy = XTy->getPointerElementType();
2478 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||((void)0)
2479 XElemTy->isPointerTy()) &&((void)0)
2480 "OMP atomic write expected a scalar type")((void)0);
2481
2482 if (XElemTy->isIntegerTy()) {
2483 StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile);
2484 XSt->setAtomic(AO);
2485 } else {
2486 // We need to bitcast and perform atomic op as integers
2487 unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
2488 IntegerType *IntCastTy =
2489 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
2490 Value *XBCast = Builder.CreateBitCast(
2491 X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast");
2492 Value *ExprCast =
2493 Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast");
2494 StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile);
2495 XSt->setAtomic(AO);
2496 }
2497
2498 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
2499 return Builder.saveIP();
2500}
2501
2502OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
2503 const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
2504 Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
2505 AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart) {
2506 if (!updateToLocation(Loc))
2507 return Loc.IP;
2508
2509 LLVM_DEBUG({do { } while (false)
2510 Type *XTy = X.Var->getType();do { } while (false)
2511 assert(XTy->isPointerTy() &&do { } while (false)
2512 "OMP Atomic expects a pointer to target memory");do { } while (false)
2513 Type *XElemTy = XTy->getPointerElementType();do { } while (false)
2514 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||do { } while (false)
2515 XElemTy->isPointerTy()) &&do { } while (false)
2516 "OMP atomic update expected a scalar type");do { } while (false)
2517 assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&do { } while (false)
2518 (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) &&do { } while (false)
2519 "OpenMP atomic does not support LT or GT operations");do { } while (false)
2520 })do { } while (false);
2521
2522 emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile,
2523 IsXLHSInRHSPart);
2524 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
2525 return Builder.saveIP();
2526}
2527
2528Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
2529 AtomicRMWInst::BinOp RMWOp) {
2530 switch (RMWOp) {
2531 case AtomicRMWInst::Add:
2532 return Builder.CreateAdd(Src1, Src2);
2533 case AtomicRMWInst::Sub:
2534 return Builder.CreateSub(Src1, Src2);
2535 case AtomicRMWInst::And:
2536 return Builder.CreateAnd(Src1, Src2);
2537 case AtomicRMWInst::Nand:
2538 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
2539 case AtomicRMWInst::Or:
2540 return Builder.CreateOr(Src1, Src2);
2541 case AtomicRMWInst::Xor:
2542 return Builder.CreateXor(Src1, Src2);
2543 case AtomicRMWInst::Xchg:
2544 case AtomicRMWInst::FAdd:
2545 case AtomicRMWInst::FSub:
2546 case AtomicRMWInst::BAD_BINOP:
2547 case AtomicRMWInst::Max:
2548 case AtomicRMWInst::Min:
2549 case AtomicRMWInst::UMax:
2550 case AtomicRMWInst::UMin:
2551 llvm_unreachable("Unsupported atomic update operation")__builtin_unreachable();
2552 }
2553 llvm_unreachable("Unsupported atomic update operation")__builtin_unreachable();
2554}
2555
2556std::pair<Value *, Value *>
2557OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr,
2558 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
2559 AtomicUpdateCallbackTy &UpdateOp,
2560 bool VolatileX, bool IsXLHSInRHSPart) {
2561 Type *XElemTy = X->getType()->getPointerElementType();
2562
2563 bool DoCmpExch =
2564 ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) ||
2565 (RMWOp == AtomicRMWInst::FSub) ||
2566 (RMWOp == AtomicRMWInst::Sub && !IsXLHSInRHSPart);
2567
2568 std::pair<Value *, Value *> Res;
2569 if (XElemTy->isIntegerTy() && !DoCmpExch) {
2570 Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
2571 // not needed except in case of postfix captures. Generate anyway for
2572 // consistency with the else part. Will be removed with any DCE pass.
2573 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
2574 } else {
2575 unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace();
2576 IntegerType *IntCastTy =
2577 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
2578 Value *XBCast =
2579 Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
2580 LoadInst *OldVal =
2581 Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load");
2582 OldVal->setAtomic(AO);
2583 // CurBB
2584 // | /---\
2585 // ContBB |
2586 // | \---/
2587 // ExitBB
2588 BasicBlock *CurBB = Builder.GetInsertBlock();
2589 Instruction *CurBBTI = CurBB->getTerminator();
2590 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
2591 BasicBlock *ExitBB =
2592 CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit");
2593 BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
2594 X->getName() + ".atomic.cont");
2595 ContBB->getTerminator()->eraseFromParent();
2596 Builder.SetInsertPoint(ContBB);
2597 llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
2598 PHI->addIncoming(OldVal, CurBB);
2599 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
2600 NewAtomicAddr->setName(X->getName() + "x.new.val");
2601 NewAtomicAddr->moveBefore(AllocIP);
2602 IntegerType *NewAtomicCastTy =
2603 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
2604 bool IsIntTy = XElemTy->isIntegerTy();
2605 Value *NewAtomicIntAddr =
2606 (IsIntTy)
2607 ? NewAtomicAddr
2608 : Builder.CreateBitCast(NewAtomicAddr,
2609 NewAtomicCastTy->getPointerTo(Addrspace));
2610 Value *OldExprVal = PHI;
2611 if (!IsIntTy) {
2612 if (XElemTy->isFloatingPointTy()) {
2613 OldExprVal = Builder.CreateBitCast(PHI, XElemTy,
2614 X->getName() + ".atomic.fltCast");
2615 } else {
2616 OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy,
2617 X->getName() + ".atomic.ptrCast");
2618 }
2619 }
2620
2621 Value *Upd = UpdateOp(OldExprVal, Builder);
2622 Builder.CreateStore(Upd, NewAtomicAddr);
2623 LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr);
2624 Value *XAddr =
2625 (IsIntTy)
2626 ? X
2627 : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
2628 AtomicOrdering Failure =
2629 llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
2630 AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg(
2631 XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure);
2632 Result->setVolatile(VolatileX);
2633 Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0);
2634 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
2635 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
2636 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
2637
2638 Res.first = OldExprVal;
2639 Res.second = Upd;
2640
2641 // set Insertion point in exit block
2642 if (UnreachableInst *ExitTI =
2643 dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
2644 CurBBTI->eraseFromParent();
2645 Builder.SetInsertPoint(ExitBB);
2646 } else {
2647 Builder.SetInsertPoint(ExitTI);
2648 }
2649 }
2650
2651 return Res;
2652}
2653
2654OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture(
2655 const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
2656 AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
2657 AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
2658 bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart) {
2659 if (!updateToLocation(Loc))
2660 return Loc.IP;
2661
2662 LLVM_DEBUG({do { } while (false)
2663 Type *XTy = X.Var->getType();do { } while (false)
2664 assert(XTy->isPointerTy() &&do { } while (false)
2665 "OMP Atomic expects a pointer to target memory");do { } while (false)
2666 Type *XElemTy = XTy->getPointerElementType();do { } while (false)
2667 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||do { } while (false)
2668 XElemTy->isPointerTy()) &&do { } while (false)
2669 "OMP atomic capture expected a scalar type");do { } while (false)
2670 assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&do { } while (false)
2671 "OpenMP atomic does not support LT or GT operations");do { } while (false)
2672 })do { } while (false);
2673
2674 // If UpdateExpr is 'x' updated with some `expr` not based on 'x',
2675 // 'x' is simply atomically rewritten with 'expr'.
2676 AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
2677 std::pair<Value *, Value *> Result =
2678 emitAtomicUpdate(AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp,
2679 X.IsVolatile, IsXLHSInRHSPart);
2680
2681 Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
2682 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
2683
2684 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
2685 return Builder.saveIP();
2686}
2687
2688GlobalVariable *
2689OpenMPIRBuilder::createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
2690 std::string VarName) {
2691 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
2692 llvm::ArrayType::get(
2693 llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()),
2694 Names);
2695 auto *MapNamesArrayGlobal = new llvm::GlobalVariable(
2696 M, MapNamesArrayInit->getType(),
2697 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit,
2698 VarName);
2699 return MapNamesArrayGlobal;
2700}
2701
2702// Create all simple and struct types exposed by the runtime and remember
2703// the llvm::PointerTypes of them for easy access later.
2704void OpenMPIRBuilder::initializeTypes(Module &M) {
2705 LLVMContext &Ctx = M.getContext();
2706 StructType *T;
2707#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
2708#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
2709 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
2710 VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
2711#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
2712 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
2713 VarName##Ptr = PointerType::getUnqual(VarName);
2714#define OMP_STRUCT_TYPE(VarName, StructName, ...) \
2715 T = StructType::getTypeByName(Ctx, StructName); \
2716 if (!T) \
2717 T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \
2718 VarName = T; \
2719 VarName##Ptr = PointerType::getUnqual(T);
2720#include "llvm/Frontend/OpenMP/OMPKinds.def"
2721}
2722
2723void OpenMPIRBuilder::OutlineInfo::collectBlocks(
2724 SmallPtrSetImpl<BasicBlock *> &BlockSet,
2725 SmallVectorImpl<BasicBlock *> &BlockVector) {
2726 SmallVector<BasicBlock *, 32> Worklist;
2727 BlockSet.insert(EntryBB);
2728 BlockSet.insert(ExitBB);
2729
2730 Worklist.push_back(EntryBB);
2731 while (!Worklist.empty()) {
2732 BasicBlock *BB = Worklist.pop_back_val();
2733 BlockVector.push_back(BB);
2734 for (BasicBlock *SuccBB : successors(BB))
2735 if (BlockSet.insert(SuccBB).second)
2736 Worklist.push_back(SuccBB);
2737 }
2738}
2739
2740void CanonicalLoopInfo::collectControlBlocks(
2741 SmallVectorImpl<BasicBlock *> &BBs) {
2742 // We only count those BBs as control block for which we do not need to
2743 // reverse the CFG, i.e. not the loop body which can contain arbitrary control
2744 // flow. For consistency, this also means we do not add the Body block, which
2745 // is just the entry to the body code.
2746 BBs.reserve(BBs.size() + 6);
2747 BBs.append({Preheader, Header, Cond, Latch, Exit, After});
2748}
2749
2750void CanonicalLoopInfo::assertOK() const {
2751#ifndef NDEBUG1
2752 if (!IsValid)
2753 return;
2754
2755 // Verify standard control-flow we use for OpenMP loops.
2756 assert(Preheader)((void)0);
2757 assert(isa<BranchInst>(Preheader->getTerminator()) &&((void)0)
2758 "Preheader must terminate with unconditional branch")((void)0);
2759 assert(Preheader->getSingleSuccessor() == Header &&((void)0)
2760 "Preheader must jump to header")((void)0);
2761
2762 assert(Header)((void)0);
2763 assert(isa<BranchInst>(Header->getTerminator()) &&((void)0)
2764 "Header must terminate with unconditional branch")((void)0);
2765 assert(Header->getSingleSuccessor() == Cond &&((void)0)
2766 "Header must jump to exiting block")((void)0);
2767
2768 assert(Cond)((void)0);
2769 assert(Cond->getSinglePredecessor() == Header &&((void)0)
2770 "Exiting block only reachable from header")((void)0);
2771
2772 assert(isa<BranchInst>(Cond->getTerminator()) &&((void)0)
2773 "Exiting block must terminate with conditional branch")((void)0);
2774 assert(size(successors(Cond)) == 2 &&((void)0)
2775 "Exiting block must have two successors")((void)0);
2776 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body &&((void)0)
2777 "Exiting block's first successor jump to the body")((void)0);
2778 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit &&((void)0)
2779 "Exiting block's second successor must exit the loop")((void)0);
2780
2781 assert(Body)((void)0);
2782 assert(Body->getSinglePredecessor() == Cond &&((void)0)
2783 "Body only reachable from exiting block")((void)0);
2784 assert(!isa<PHINode>(Body->front()))((void)0);
2785
2786 assert(Latch)((void)0);
2787 assert(isa<BranchInst>(Latch->getTerminator()) &&((void)0)
2788 "Latch must terminate with unconditional branch")((void)0);
2789 assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header")((void)0);
2790 // TODO: To support simple redirecting of the end of the body code that has
2791 // multiple; introduce another auxiliary basic block like preheader and after.
2792 assert(Latch->getSinglePredecessor() != nullptr)((void)0);
2793 assert(!isa<PHINode>(Latch->front()))((void)0);
2794
2795 assert(Exit)((void)0);
2796 assert(isa<BranchInst>(Exit->getTerminator()) &&((void)0)
2797 "Exit block must terminate with unconditional branch")((void)0);
2798 assert(Exit->getSingleSuccessor() == After &&((void)0)
2799 "Exit block must jump to after block")((void)0);
2800
2801 assert(After)((void)0);
2802 assert(After->getSinglePredecessor() == Exit &&((void)0)
2803 "After block only reachable from exit block")((void)0);
2804 assert(After->empty() || !isa<PHINode>(After->front()))((void)0);
2805
2806 Instruction *IndVar = getIndVar();
2807 assert(IndVar && "Canonical induction variable not found?")((void)0);
2808 assert(isa<IntegerType>(IndVar->getType()) &&((void)0)
2809 "Induction variable must be an integer")((void)0);
2810 assert(cast<PHINode>(IndVar)->getParent() == Header &&((void)0)
2811 "Induction variable must be a PHI in the loop header")((void)0);
2812 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader)((void)0);
2813 assert(((void)0)
2814 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero())((void)0);
2815 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch)((void)0);
2816
2817 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
2818 assert(cast<Instruction>(NextIndVar)->getParent() == Latch)((void)0);
2819 assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add)((void)0);
2820 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar)((void)0);
2821 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))((void)0)
2822 ->isOne())((void)0);
2823
2824 Value *TripCount = getTripCount();
2825 assert(TripCount && "Loop trip count not found?")((void)0);
2826 assert(IndVar->getType() == TripCount->getType() &&((void)0)
2827 "Trip count and induction variable must have the same type")((void)0);
2828
2829 auto *CmpI = cast<CmpInst>(&Cond->front());
2830 assert(CmpI->getPredicate() == CmpInst::ICMP_ULT &&((void)0)
2831 "Exit condition must be a signed less-than comparison")((void)0);
2832 assert(CmpI->getOperand(0) == IndVar &&((void)0)
2833 "Exit condition must compare the induction variable")((void)0);
2834 assert(CmpI->getOperand(1) == TripCount &&((void)0)
2835 "Exit condition must compare with the trip count")((void)0);
2836#endif
2837}