clang 20.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGRecordLayout.h"
18#include "CodeGenFunction.h"
19#include "TargetInfo.h"
20#include "clang/AST/APValue.h"
21#include "clang/AST/Attr.h"
22#include "clang/AST/Decl.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/ADT/StringExtras.h"
32#include "llvm/Bitcode/BitcodeReader.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/GlobalValue.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Value.h"
38#include "llvm/Support/AtomicOrdering.h"
39#include "llvm/Support/raw_ostream.h"
40#include <cassert>
41#include <cstdint>
42#include <numeric>
43#include <optional>
44
45using namespace clang;
46using namespace CodeGen;
47using namespace llvm::omp;
48
49namespace {
50/// Base class for handling code generation inside OpenMP regions.
51class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
52public:
53 /// Kinds of OpenMP regions used in codegen.
54 enum CGOpenMPRegionKind {
55 /// Region with outlined function for standalone 'parallel'
56 /// directive.
57 ParallelOutlinedRegion,
58 /// Region with outlined function for standalone 'task' directive.
59 TaskOutlinedRegion,
60 /// Region for constructs that do not require function outlining,
61 /// like 'for', 'sections', 'atomic' etc. directives.
62 InlinedRegion,
63 /// Region with outlined function for standalone 'target' directive.
64 TargetRegion,
65 };
66
67 CGOpenMPRegionInfo(const CapturedStmt &CS,
68 const CGOpenMPRegionKind RegionKind,
69 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
70 bool HasCancel)
71 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
72 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
73
74 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
75 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
76 bool HasCancel)
77 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
78 Kind(Kind), HasCancel(HasCancel) {}
79
80 /// Get a variable or parameter for storing global thread id
81 /// inside OpenMP construct.
82 virtual const VarDecl *getThreadIDVariable() const = 0;
83
84 /// Emit the captured statement body.
85 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
86
87 /// Get an LValue for the current ThreadID variable.
88 /// \return LValue for thread id variable. This LValue always has type int32*.
89 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
90
91 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
92
93 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
94
95 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
96
97 bool hasCancel() const { return HasCancel; }
98
99 static bool classof(const CGCapturedStmtInfo *Info) {
100 return Info->getKind() == CR_OpenMP;
101 }
102
103 ~CGOpenMPRegionInfo() override = default;
104
105protected:
106 CGOpenMPRegionKind RegionKind;
107 RegionCodeGenTy CodeGen;
109 bool HasCancel;
110};
111
112/// API for captured statement code generation in OpenMP constructs.
113class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
114public:
115 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
116 const RegionCodeGenTy &CodeGen,
117 OpenMPDirectiveKind Kind, bool HasCancel,
118 StringRef HelperName)
119 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
120 HasCancel),
121 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
122 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
123 }
124
125 /// Get a variable or parameter for storing global thread id
126 /// inside OpenMP construct.
127 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
128
129 /// Get the name of the capture helper.
130 StringRef getHelperName() const override { return HelperName; }
131
132 static bool classof(const CGCapturedStmtInfo *Info) {
133 return CGOpenMPRegionInfo::classof(Info) &&
134 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
135 ParallelOutlinedRegion;
136 }
137
138private:
139 /// A variable or parameter storing global thread id for OpenMP
140 /// constructs.
141 const VarDecl *ThreadIDVar;
142 StringRef HelperName;
143};
144
145/// API for captured statement code generation in OpenMP constructs.
146class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
147public:
148 class UntiedTaskActionTy final : public PrePostActionTy {
149 bool Untied;
150 const VarDecl *PartIDVar;
151 const RegionCodeGenTy UntiedCodeGen;
152 llvm::SwitchInst *UntiedSwitch = nullptr;
153
154 public:
155 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
156 const RegionCodeGenTy &UntiedCodeGen)
157 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
158 void Enter(CodeGenFunction &CGF) override {
159 if (Untied) {
160 // Emit task switching point.
161 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
162 CGF.GetAddrOfLocalVar(PartIDVar),
163 PartIDVar->getType()->castAs<PointerType>());
164 llvm::Value *Res =
165 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
166 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
167 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
168 CGF.EmitBlock(DoneBB);
170 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
171 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
172 CGF.Builder.GetInsertBlock());
173 emitUntiedSwitch(CGF);
174 }
175 }
176 void emitUntiedSwitch(CodeGenFunction &CGF) const {
177 if (Untied) {
178 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
179 CGF.GetAddrOfLocalVar(PartIDVar),
180 PartIDVar->getType()->castAs<PointerType>());
181 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
182 PartIdLVal);
183 UntiedCodeGen(CGF);
184 CodeGenFunction::JumpDest CurPoint =
185 CGF.getJumpDestInCurrentScope(".untied.next.");
187 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
188 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
189 CGF.Builder.GetInsertBlock());
190 CGF.EmitBranchThroughCleanup(CurPoint);
191 CGF.EmitBlock(CurPoint.getBlock());
192 }
193 }
194 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
195 };
196 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
197 const VarDecl *ThreadIDVar,
198 const RegionCodeGenTy &CodeGen,
199 OpenMPDirectiveKind Kind, bool HasCancel,
200 const UntiedTaskActionTy &Action)
201 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
202 ThreadIDVar(ThreadIDVar), Action(Action) {
203 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
204 }
205
206 /// Get a variable or parameter for storing global thread id
207 /// inside OpenMP construct.
208 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
209
210 /// Get an LValue for the current ThreadID variable.
211 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
212
213 /// Get the name of the capture helper.
214 StringRef getHelperName() const override { return ".omp_outlined."; }
215
216 void emitUntiedSwitch(CodeGenFunction &CGF) override {
217 Action.emitUntiedSwitch(CGF);
218 }
219
220 static bool classof(const CGCapturedStmtInfo *Info) {
221 return CGOpenMPRegionInfo::classof(Info) &&
222 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
223 TaskOutlinedRegion;
224 }
225
226private:
227 /// A variable or parameter storing global thread id for OpenMP
228 /// constructs.
229 const VarDecl *ThreadIDVar;
230 /// Action for emitting code for untied tasks.
231 const UntiedTaskActionTy &Action;
232};
233
234/// API for inlined captured statement code generation in OpenMP
235/// constructs.
236class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
237public:
238 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
239 const RegionCodeGenTy &CodeGen,
240 OpenMPDirectiveKind Kind, bool HasCancel)
241 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
242 OldCSI(OldCSI),
243 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
244
245 // Retrieve the value of the context parameter.
246 llvm::Value *getContextValue() const override {
247 if (OuterRegionInfo)
248 return OuterRegionInfo->getContextValue();
249 llvm_unreachable("No context value for inlined OpenMP region");
250 }
251
252 void setContextValue(llvm::Value *V) override {
253 if (OuterRegionInfo) {
254 OuterRegionInfo->setContextValue(V);
255 return;
256 }
257 llvm_unreachable("No context value for inlined OpenMP region");
258 }
259
260 /// Lookup the captured field decl for a variable.
261 const FieldDecl *lookup(const VarDecl *VD) const override {
262 if (OuterRegionInfo)
263 return OuterRegionInfo->lookup(VD);
264 // If there is no outer outlined region,no need to lookup in a list of
265 // captured variables, we can use the original one.
266 return nullptr;
267 }
268
269 FieldDecl *getThisFieldDecl() const override {
270 if (OuterRegionInfo)
271 return OuterRegionInfo->getThisFieldDecl();
272 return nullptr;
273 }
274
275 /// Get a variable or parameter for storing global thread id
276 /// inside OpenMP construct.
277 const VarDecl *getThreadIDVariable() const override {
278 if (OuterRegionInfo)
279 return OuterRegionInfo->getThreadIDVariable();
280 return nullptr;
281 }
282
283 /// Get an LValue for the current ThreadID variable.
284 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
285 if (OuterRegionInfo)
286 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
287 llvm_unreachable("No LValue for inlined OpenMP construct");
288 }
289
290 /// Get the name of the capture helper.
291 StringRef getHelperName() const override {
292 if (auto *OuterRegionInfo = getOldCSI())
293 return OuterRegionInfo->getHelperName();
294 llvm_unreachable("No helper name for inlined OpenMP construct");
295 }
296
297 void emitUntiedSwitch(CodeGenFunction &CGF) override {
298 if (OuterRegionInfo)
299 OuterRegionInfo->emitUntiedSwitch(CGF);
300 }
301
302 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
303
304 static bool classof(const CGCapturedStmtInfo *Info) {
305 return CGOpenMPRegionInfo::classof(Info) &&
306 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
307 }
308
309 ~CGOpenMPInlinedRegionInfo() override = default;
310
311private:
312 /// CodeGen info about outer OpenMP region.
313 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
314 CGOpenMPRegionInfo *OuterRegionInfo;
315};
316
317/// API for captured statement code generation in OpenMP target
318/// constructs. For this captures, implicit parameters are used instead of the
319/// captured fields. The name of the target region has to be unique in a given
320/// application so it is provided by the client, because only the client has
321/// the information to generate that.
322class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
323public:
324 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
325 const RegionCodeGenTy &CodeGen, StringRef HelperName)
326 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
327 /*HasCancel=*/false),
328 HelperName(HelperName) {}
329
330 /// This is unused for target regions because each starts executing
331 /// with a single thread.
332 const VarDecl *getThreadIDVariable() const override { return nullptr; }
333
334 /// Get the name of the capture helper.
335 StringRef getHelperName() const override { return HelperName; }
336
337 static bool classof(const CGCapturedStmtInfo *Info) {
338 return CGOpenMPRegionInfo::classof(Info) &&
339 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
340 }
341
342private:
343 StringRef HelperName;
344};
345
346static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
347 llvm_unreachable("No codegen for expressions");
348}
349/// API for generation of expressions captured in a innermost OpenMP
350/// region.
351class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
352public:
353 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
354 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
355 OMPD_unknown,
356 /*HasCancel=*/false),
357 PrivScope(CGF) {
358 // Make sure the globals captured in the provided statement are local by
359 // using the privatization logic. We assume the same variable is not
360 // captured more than once.
361 for (const auto &C : CS.captures()) {
362 if (!C.capturesVariable() && !C.capturesVariableByCopy())
363 continue;
364
365 const VarDecl *VD = C.getCapturedVar();
366 if (VD->isLocalVarDeclOrParm())
367 continue;
368
369 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
370 /*RefersToEnclosingVariableOrCapture=*/false,
372 C.getLocation());
373 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
374 }
375 (void)PrivScope.Privatize();
376 }
377
378 /// Lookup the captured field decl for a variable.
379 const FieldDecl *lookup(const VarDecl *VD) const override {
380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381 return FD;
382 return nullptr;
383 }
384
385 /// Emit the captured statement body.
386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387 llvm_unreachable("No body for expressions");
388 }
389
390 /// Get a variable or parameter for storing global thread id
391 /// inside OpenMP construct.
392 const VarDecl *getThreadIDVariable() const override {
393 llvm_unreachable("No thread id for expressions");
394 }
395
396 /// Get the name of the capture helper.
397 StringRef getHelperName() const override {
398 llvm_unreachable("No helper name for expressions");
399 }
400
401 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
402
403private:
404 /// Private scope to capture global variables.
405 CodeGenFunction::OMPPrivateScope PrivScope;
406};
407
408/// RAII for emitting code of OpenMP constructs.
409class InlinedOpenMPRegionRAII {
410 CodeGenFunction &CGF;
411 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
412 FieldDecl *LambdaThisCaptureField = nullptr;
413 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414 bool NoInheritance = false;
415
416public:
417 /// Constructs region for combined constructs.
418 /// \param CodeGen Code generation sequence for combined directives. Includes
419 /// a list of functions used for code generation of implicitly inlined
420 /// regions.
421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422 OpenMPDirectiveKind Kind, bool HasCancel,
423 bool NoInheritance = true)
424 : CGF(CGF), NoInheritance(NoInheritance) {
425 // Start emission for the construct.
426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428 if (NoInheritance) {
429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431 CGF.LambdaThisCaptureField = nullptr;
432 BlockInfo = CGF.BlockInfo;
433 CGF.BlockInfo = nullptr;
434 }
435 }
436
437 ~InlinedOpenMPRegionRAII() {
438 // Restore original CapturedStmtInfo only if we're done with code emission.
439 auto *OldCSI =
440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441 delete CGF.CapturedStmtInfo;
442 CGF.CapturedStmtInfo = OldCSI;
443 if (NoInheritance) {
444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446 CGF.BlockInfo = BlockInfo;
447 }
448 }
449};
450
451/// Values for bit flags used in the ident_t to describe the fields.
452/// All enumeric elements are named and described in accordance with the code
453/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454enum OpenMPLocationFlags : unsigned {
455 /// Use trampoline for internal microtask.
456 OMP_IDENT_IMD = 0x01,
457 /// Use c-style ident structure.
458 OMP_IDENT_KMPC = 0x02,
459 /// Atomic reduction option for kmpc_reduce.
460 OMP_ATOMIC_REDUCE = 0x10,
461 /// Explicit 'barrier' directive.
462 OMP_IDENT_BARRIER_EXPL = 0x20,
463 /// Implicit barrier in code.
464 OMP_IDENT_BARRIER_IMPL = 0x40,
465 /// Implicit barrier in 'for' directive.
466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467 /// Implicit barrier in 'sections' directive.
468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469 /// Implicit barrier in 'single' directive.
470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471 /// Call of __kmp_for_static_init for static loop.
472 OMP_IDENT_WORK_LOOP = 0x200,
473 /// Call of __kmp_for_static_init for sections.
474 OMP_IDENT_WORK_SECTIONS = 0x400,
475 /// Call of __kmp_for_static_init for distribute.
476 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
478};
479
480/// Describes ident structure that describes a source location.
481/// All descriptions are taken from
482/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
483/// Original structure:
484/// typedef struct ident {
485/// kmp_int32 reserved_1; /**< might be used in Fortran;
486/// see above */
487/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
488/// KMP_IDENT_KMPC identifies this union
489/// member */
490/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
491/// see above */
492///#if USE_ITT_BUILD
493/// /* but currently used for storing
494/// region-specific ITT */
495/// /* contextual information. */
496///#endif /* USE_ITT_BUILD */
497/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
498/// C++ */
499/// char const *psource; /**< String describing the source location.
500/// The string is composed of semi-colon separated
501// fields which describe the source file,
502/// the function and a pair of line numbers that
503/// delimit the construct.
504/// */
505/// } ident_t;
506enum IdentFieldIndex {
507 /// might be used in Fortran
508 IdentField_Reserved_1,
509 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
510 IdentField_Flags,
511 /// Not really used in Fortran any more
512 IdentField_Reserved_2,
513 /// Source[4] in Fortran, do not use for C++
514 IdentField_Reserved_3,
515 /// String describing the source location. The string is composed of
516 /// semi-colon separated fields which describe the source file, the function
517 /// and a pair of line numbers that delimit the construct.
518 IdentField_PSource
519};
520
521/// Schedule types for 'omp for' loops (these enumerators are taken from
522/// the enum sched_type in kmp.h).
523enum OpenMPSchedType {
524 /// Lower bound for default (unordered) versions.
525 OMP_sch_lower = 32,
526 OMP_sch_static_chunked = 33,
527 OMP_sch_static = 34,
528 OMP_sch_dynamic_chunked = 35,
529 OMP_sch_guided_chunked = 36,
530 OMP_sch_runtime = 37,
531 OMP_sch_auto = 38,
532 /// static with chunk adjustment (e.g., simd)
533 OMP_sch_static_balanced_chunked = 45,
534 /// Lower bound for 'ordered' versions.
535 OMP_ord_lower = 64,
536 OMP_ord_static_chunked = 65,
537 OMP_ord_static = 66,
538 OMP_ord_dynamic_chunked = 67,
539 OMP_ord_guided_chunked = 68,
540 OMP_ord_runtime = 69,
541 OMP_ord_auto = 70,
542 OMP_sch_default = OMP_sch_static,
543 /// dist_schedule types
544 OMP_dist_sch_static_chunked = 91,
545 OMP_dist_sch_static = 92,
546 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
547 /// Set if the monotonic schedule modifier was present.
548 OMP_sch_modifier_monotonic = (1 << 29),
549 /// Set if the nonmonotonic schedule modifier was present.
550 OMP_sch_modifier_nonmonotonic = (1 << 30),
551};
552
553/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
554/// region.
555class CleanupTy final : public EHScopeStack::Cleanup {
556 PrePostActionTy *Action;
557
558public:
559 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
560 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
561 if (!CGF.HaveInsertPoint())
562 return;
563 Action->Exit(CGF);
564 }
565};
566
567} // anonymous namespace
568
571 if (PrePostAction) {
572 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
573 Callback(CodeGen, CGF, *PrePostAction);
574 } else {
575 PrePostActionTy Action;
576 Callback(CodeGen, CGF, Action);
577 }
578}
579
580/// Check if the combiner is a call to UDR combiner and if it is so return the
581/// UDR decl used for reduction.
582static const OMPDeclareReductionDecl *
583getReductionInit(const Expr *ReductionOp) {
584 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
585 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
586 if (const auto *DRE =
587 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
588 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
589 return DRD;
590 return nullptr;
591}
592
594 const OMPDeclareReductionDecl *DRD,
595 const Expr *InitOp,
596 Address Private, Address Original,
597 QualType Ty) {
598 if (DRD->getInitializer()) {
599 std::pair<llvm::Function *, llvm::Function *> Reduction =
601 const auto *CE = cast<CallExpr>(InitOp);
602 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
603 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
604 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
605 const auto *LHSDRE =
606 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
607 const auto *RHSDRE =
608 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
609 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
610 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
611 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
612 (void)PrivateScope.Privatize();
614 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
615 CGF.EmitIgnoredExpr(InitOp);
616 } else {
617 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
618 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
619 auto *GV = new llvm::GlobalVariable(
620 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
621 llvm::GlobalValue::PrivateLinkage, Init, Name);
622 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
623 RValue InitRVal;
624 switch (CGF.getEvaluationKind(Ty)) {
625 case TEK_Scalar:
626 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
627 break;
628 case TEK_Complex:
629 InitRVal =
631 break;
632 case TEK_Aggregate: {
633 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
634 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
635 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
636 /*IsInitializer=*/false);
637 return;
638 }
639 }
640 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
641 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
642 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
643 /*IsInitializer=*/false);
644 }
645}
646
647/// Emit initialization of arrays of complex types.
648/// \param DestAddr Address of the array.
649/// \param Type Type of array.
650/// \param Init Initial expression of array.
651/// \param SrcAddr Address of the original array.
653 QualType Type, bool EmitDeclareReductionInit,
654 const Expr *Init,
655 const OMPDeclareReductionDecl *DRD,
656 Address SrcAddr = Address::invalid()) {
657 // Perform element-by-element initialization.
658 QualType ElementTy;
659
660 // Drill down to the base element type on both arrays.
661 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
662 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
663 if (DRD)
664 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
665
666 llvm::Value *SrcBegin = nullptr;
667 if (DRD)
668 SrcBegin = SrcAddr.emitRawPointer(CGF);
669 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
670 // Cast from pointer to array type to pointer to single element.
671 llvm::Value *DestEnd =
672 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
673 // The basic structure here is a while-do loop.
674 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
675 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
676 llvm::Value *IsEmpty =
677 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
678 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
679
680 // Enter the loop body, making that address the current address.
681 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
682 CGF.EmitBlock(BodyBB);
683
684 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
685
686 llvm::PHINode *SrcElementPHI = nullptr;
687 Address SrcElementCurrent = Address::invalid();
688 if (DRD) {
689 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
690 "omp.arraycpy.srcElementPast");
691 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
692 SrcElementCurrent =
693 Address(SrcElementPHI, SrcAddr.getElementType(),
694 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
695 }
696 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
697 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
698 DestElementPHI->addIncoming(DestBegin, EntryBB);
699 Address DestElementCurrent =
700 Address(DestElementPHI, DestAddr.getElementType(),
701 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
702
703 // Emit copy.
704 {
705 CodeGenFunction::RunCleanupsScope InitScope(CGF);
706 if (EmitDeclareReductionInit) {
707 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
708 SrcElementCurrent, ElementTy);
709 } else
710 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
711 /*IsInitializer=*/false);
712 }
713
714 if (DRD) {
715 // Shift the address forward by one element.
716 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
717 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
718 "omp.arraycpy.dest.element");
719 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
720 }
721
722 // Shift the address forward by one element.
723 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
724 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
725 "omp.arraycpy.dest.element");
726 // Check whether we've reached the end.
727 llvm::Value *Done =
728 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
729 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
730 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
731
732 // Done.
733 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
734}
735
736LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
737 return CGF.EmitOMPSharedLValue(E);
738}
739
740LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
741 const Expr *E) {
742 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
743 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
744 return LValue();
745}
746
747void ReductionCodeGen::emitAggregateInitialization(
748 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
749 const OMPDeclareReductionDecl *DRD) {
750 // Emit VarDecl with copy init for arrays.
751 // Get the address of the original variable captured in current
752 // captured region.
753 const auto *PrivateVD =
754 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
755 bool EmitDeclareReductionInit =
756 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
757 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
758 EmitDeclareReductionInit,
759 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
760 : PrivateVD->getInit(),
761 DRD, SharedAddr);
762}
763
766 ArrayRef<const Expr *> Privates,
767 ArrayRef<const Expr *> ReductionOps) {
768 ClausesData.reserve(Shareds.size());
769 SharedAddresses.reserve(Shareds.size());
770 Sizes.reserve(Shareds.size());
771 BaseDecls.reserve(Shareds.size());
772 const auto *IOrig = Origs.begin();
773 const auto *IPriv = Privates.begin();
774 const auto *IRed = ReductionOps.begin();
775 for (const Expr *Ref : Shareds) {
776 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
777 std::advance(IOrig, 1);
778 std::advance(IPriv, 1);
779 std::advance(IRed, 1);
780 }
781}
782
784 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
785 "Number of generated lvalues must be exactly N.");
786 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
787 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
788 SharedAddresses.emplace_back(First, Second);
789 if (ClausesData[N].Shared == ClausesData[N].Ref) {
790 OrigAddresses.emplace_back(First, Second);
791 } else {
792 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
793 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
794 OrigAddresses.emplace_back(First, Second);
795 }
796}
797
799 QualType PrivateType = getPrivateType(N);
800 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
801 if (!PrivateType->isVariablyModifiedType()) {
802 Sizes.emplace_back(
803 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
804 nullptr);
805 return;
806 }
807 llvm::Value *Size;
808 llvm::Value *SizeInChars;
809 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
810 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
811 if (AsArraySection) {
812 Size = CGF.Builder.CreatePtrDiff(ElemType,
813 OrigAddresses[N].second.getPointer(CGF),
814 OrigAddresses[N].first.getPointer(CGF));
815 Size = CGF.Builder.CreateNUWAdd(
816 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
817 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
818 } else {
819 SizeInChars =
820 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
821 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
822 }
823 Sizes.emplace_back(SizeInChars, Size);
825 CGF,
826 cast<OpaqueValueExpr>(
827 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
828 RValue::get(Size));
829 CGF.EmitVariablyModifiedType(PrivateType);
830}
831
833 llvm::Value *Size) {
834 QualType PrivateType = getPrivateType(N);
835 if (!PrivateType->isVariablyModifiedType()) {
836 assert(!Size && !Sizes[N].second &&
837 "Size should be nullptr for non-variably modified reduction "
838 "items.");
839 return;
840 }
842 CGF,
843 cast<OpaqueValueExpr>(
844 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
845 RValue::get(Size));
846 CGF.EmitVariablyModifiedType(PrivateType);
847}
848
850 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
851 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
852 assert(SharedAddresses.size() > N && "No variable was generated");
853 const auto *PrivateVD =
854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855 const OMPDeclareReductionDecl *DRD =
856 getReductionInit(ClausesData[N].ReductionOp);
857 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
858 if (DRD && DRD->getInitializer())
859 (void)DefaultInit(CGF);
860 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
861 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
862 (void)DefaultInit(CGF);
863 QualType SharedType = SharedAddresses[N].first.getType();
864 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
865 PrivateAddr, SharedAddr, SharedType);
866 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
867 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
868 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
869 PrivateVD->getType().getQualifiers(),
870 /*IsInitializer=*/false);
871 }
872}
873
875 QualType PrivateType = getPrivateType(N);
876 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
877 return DTorKind != QualType::DK_none;
878}
879
881 Address PrivateAddr) {
882 QualType PrivateType = getPrivateType(N);
883 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
884 if (needCleanups(N)) {
885 PrivateAddr =
886 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
887 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
888 }
889}
890
892 LValue BaseLV) {
893 BaseTy = BaseTy.getNonReferenceType();
894 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
895 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
896 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
897 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
898 } else {
899 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
900 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
901 }
902 BaseTy = BaseTy->getPointeeType();
903 }
904 return CGF.MakeAddrLValue(
905 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
906 BaseLV.getType(), BaseLV.getBaseInfo(),
907 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
908}
909
911 Address OriginalBaseAddress, llvm::Value *Addr) {
913 Address TopTmp = Address::invalid();
914 Address MostTopTmp = Address::invalid();
915 BaseTy = BaseTy.getNonReferenceType();
916 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
917 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
918 Tmp = CGF.CreateMemTemp(BaseTy);
919 if (TopTmp.isValid())
920 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
921 else
922 MostTopTmp = Tmp;
923 TopTmp = Tmp;
924 BaseTy = BaseTy->getPointeeType();
925 }
926
927 if (Tmp.isValid()) {
929 Addr, Tmp.getElementType());
930 CGF.Builder.CreateStore(Addr, Tmp);
931 return MostTopTmp;
932 }
933
935 Addr, OriginalBaseAddress.getType());
936 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
937}
938
939static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
940 const VarDecl *OrigVD = nullptr;
941 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
942 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
943 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
944 Base = TempOASE->getBase()->IgnoreParenImpCasts();
945 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
946 Base = TempASE->getBase()->IgnoreParenImpCasts();
947 DE = cast<DeclRefExpr>(Base);
948 OrigVD = cast<VarDecl>(DE->getDecl());
949 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
950 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
951 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
952 Base = TempASE->getBase()->IgnoreParenImpCasts();
953 DE = cast<DeclRefExpr>(Base);
954 OrigVD = cast<VarDecl>(DE->getDecl());
955 }
956 return OrigVD;
957}
958
960 Address PrivateAddr) {
961 const DeclRefExpr *DE;
962 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
963 BaseDecls.emplace_back(OrigVD);
964 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
965 LValue BaseLValue =
966 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
967 OriginalBaseLValue);
968 Address SharedAddr = SharedAddresses[N].first.getAddress();
969 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
970 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
971 SharedAddr.emitRawPointer(CGF));
972 llvm::Value *PrivatePointer =
974 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
975 llvm::Value *Ptr = CGF.Builder.CreateGEP(
976 SharedAddr.getElementType(), PrivatePointer, Adjustment);
977 return castToBase(CGF, OrigVD->getType(),
978 SharedAddresses[N].first.getType(),
979 OriginalBaseLValue.getAddress(), Ptr);
980 }
981 BaseDecls.emplace_back(
982 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
983 return PrivateAddr;
984}
985
987 const OMPDeclareReductionDecl *DRD =
988 getReductionInit(ClausesData[N].ReductionOp);
989 return DRD && DRD->getInitializer();
990}
991
992LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
993 return CGF.EmitLoadOfPointerLValue(
994 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
995 getThreadIDVariable()->getType()->castAs<PointerType>());
996}
997
998void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
999 if (!CGF.HaveInsertPoint())
1000 return;
1001 // 1.2.2 OpenMP Language Terminology
1002 // Structured block - An executable statement with a single entry at the
1003 // top and a single exit at the bottom.
1004 // The point of exit cannot be a branch out of the structured block.
1005 // longjmp() and throw() must not violate the entry/exit criteria.
1006 CGF.EHStack.pushTerminate();
1007 if (S)
1009 CodeGen(CGF);
1010 CGF.EHStack.popTerminate();
1011}
1012
1013LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1014 CodeGenFunction &CGF) {
1015 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1016 getThreadIDVariable()->getType(),
1018}
1019
1021 QualType FieldTy) {
1022 auto *Field = FieldDecl::Create(
1023 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1024 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1025 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1026 Field->setAccess(AS_public);
1027 DC->addDecl(Field);
1028 return Field;
1029}
1030
1032 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1033 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1034 llvm::OpenMPIRBuilderConfig Config(
1035 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1036 CGM.getLangOpts().OpenMPOffloadMandatory,
1037 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1038 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1039 OMPBuilder.initialize();
1040 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1042 : StringRef{});
1043 OMPBuilder.setConfig(Config);
1044
1045 // The user forces the compiler to behave as if omp requires
1046 // unified_shared_memory was given.
1047 if (CGM.getLangOpts().OpenMPForceUSM) {
1049 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1050 }
1051}
1052
1054 InternalVars.clear();
1055 // Clean non-target variable declarations possibly used only in debug info.
1056 for (const auto &Data : EmittedNonTargetVariables) {
1057 if (!Data.getValue().pointsToAliveValue())
1058 continue;
1059 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1060 if (!GV)
1061 continue;
1062 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1063 continue;
1064 GV->eraseFromParent();
1065 }
1066}
1067
1069 return OMPBuilder.createPlatformSpecificName(Parts);
1070}
1071
1072static llvm::Function *
1074 const Expr *CombinerInitializer, const VarDecl *In,
1075 const VarDecl *Out, bool IsCombiner) {
1076 // void .omp_combiner.(Ty *in, Ty *out);
1077 ASTContext &C = CGM.getContext();
1078 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1079 FunctionArgList Args;
1080 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1081 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1082 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1083 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1084 Args.push_back(&OmpOutParm);
1085 Args.push_back(&OmpInParm);
1086 const CGFunctionInfo &FnInfo =
1087 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1088 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1089 std::string Name = CGM.getOpenMPRuntime().getName(
1090 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1091 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1092 Name, &CGM.getModule());
1093 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1094 if (CGM.getLangOpts().Optimize) {
1095 Fn->removeFnAttr(llvm::Attribute::NoInline);
1096 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1097 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1098 }
1099 CodeGenFunction CGF(CGM);
1100 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1101 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1102 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1103 Out->getLocation());
1104 CodeGenFunction::OMPPrivateScope Scope(CGF);
1105 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1106 Scope.addPrivate(
1107 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1108 .getAddress());
1109 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1110 Scope.addPrivate(
1111 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1112 .getAddress());
1113 (void)Scope.Privatize();
1114 if (!IsCombiner && Out->hasInit() &&
1115 !CGF.isTrivialInitializer(Out->getInit())) {
1116 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1117 Out->getType().getQualifiers(),
1118 /*IsInitializer=*/true);
1119 }
1120 if (CombinerInitializer)
1121 CGF.EmitIgnoredExpr(CombinerInitializer);
1122 Scope.ForceCleanup();
1123 CGF.FinishFunction();
1124 return Fn;
1125}
1126
1129 if (UDRMap.count(D) > 0)
1130 return;
1131 llvm::Function *Combiner = emitCombinerOrInitializer(
1132 CGM, D->getType(), D->getCombiner(),
1133 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1134 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1135 /*IsCombiner=*/true);
1136 llvm::Function *Initializer = nullptr;
1137 if (const Expr *Init = D->getInitializer()) {
1139 CGM, D->getType(),
1140 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1141 : nullptr,
1142 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1143 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1144 /*IsCombiner=*/false);
1145 }
1146 UDRMap.try_emplace(D, Combiner, Initializer);
1147 if (CGF)
1148 FunctionUDRMap[CGF->CurFn].push_back(D);
1149}
1150
1151std::pair<llvm::Function *, llvm::Function *>
1153 auto I = UDRMap.find(D);
1154 if (I != UDRMap.end())
1155 return I->second;
1156 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1157 return UDRMap.lookup(D);
1158}
1159
1160namespace {
1161// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1162// Builder if one is present.
1163struct PushAndPopStackRAII {
1164 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1165 bool HasCancel, llvm::omp::Directive Kind)
1166 : OMPBuilder(OMPBuilder) {
1167 if (!OMPBuilder)
1168 return;
1169
1170 // The following callback is the crucial part of clangs cleanup process.
1171 //
1172 // NOTE:
1173 // Once the OpenMPIRBuilder is used to create parallel regions (and
1174 // similar), the cancellation destination (Dest below) is determined via
1175 // IP. That means if we have variables to finalize we split the block at IP,
1176 // use the new block (=BB) as destination to build a JumpDest (via
1177 // getJumpDestInCurrentScope(BB)) which then is fed to
1178 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1179 // to push & pop an FinalizationInfo object.
1180 // The FiniCB will still be needed but at the point where the
1181 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1182 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1183 assert(IP.getBlock()->end() == IP.getPoint() &&
1184 "Clang CG should cause non-terminated block!");
1185 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1186 CGF.Builder.restoreIP(IP);
1188 CGF.getOMPCancelDestination(OMPD_parallel);
1189 CGF.EmitBranchThroughCleanup(Dest);
1190 return llvm::Error::success();
1191 };
1192
1193 // TODO: Remove this once we emit parallel regions through the
1194 // OpenMPIRBuilder as it can do this setup internally.
1195 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1196 OMPBuilder->pushFinalizationCB(std::move(FI));
1197 }
1198 ~PushAndPopStackRAII() {
1199 if (OMPBuilder)
1200 OMPBuilder->popFinalizationCB();
1201 }
1202 llvm::OpenMPIRBuilder *OMPBuilder;
1203};
1204} // namespace
1205
1207 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1208 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1209 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1210 assert(ThreadIDVar->getType()->isPointerType() &&
1211 "thread id variable must be of type kmp_int32 *");
1212 CodeGenFunction CGF(CGM, true);
1213 bool HasCancel = false;
1214 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1215 HasCancel = OPD->hasCancel();
1216 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1217 HasCancel = OPD->hasCancel();
1218 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1219 HasCancel = OPSD->hasCancel();
1220 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1221 HasCancel = OPFD->hasCancel();
1222 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1223 HasCancel = OPFD->hasCancel();
1224 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1225 HasCancel = OPFD->hasCancel();
1226 else if (const auto *OPFD =
1227 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1228 HasCancel = OPFD->hasCancel();
1229 else if (const auto *OPFD =
1230 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1231 HasCancel = OPFD->hasCancel();
1232
1233 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1234 // parallel region to make cancellation barriers work properly.
1235 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1236 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1237 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1238 HasCancel, OutlinedHelperName);
1239 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1241}
1242
1243std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1244 std::string Suffix = getName({"omp_outlined"});
1245 return (Name + Suffix).str();
1246}
1247
1249 return getOutlinedHelperName(CGF.CurFn->getName());
1250}
1251
1252std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1253 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1254 return (Name + Suffix).str();
1255}
1256
1259 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1260 const RegionCodeGenTy &CodeGen) {
1261 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1263 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1264 CodeGen);
1265}
1266
1269 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1270 const RegionCodeGenTy &CodeGen) {
1271 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1273 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1274 CodeGen);
1275}
1276
1278 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1280 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1281 bool Tied, unsigned &NumberOfParts) {
1282 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1283 PrePostActionTy &) {
1284 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1285 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1286 llvm::Value *TaskArgs[] = {
1287 UpLoc, ThreadID,
1288 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1289 TaskTVar->getType()->castAs<PointerType>())
1290 .getPointer(CGF)};
1291 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1292 CGM.getModule(), OMPRTL___kmpc_omp_task),
1293 TaskArgs);
1294 };
1295 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1296 UntiedCodeGen);
1297 CodeGen.setAction(Action);
1298 assert(!ThreadIDVar->getType()->isPointerType() &&
1299 "thread id variable must be of type kmp_int32 for tasks");
1300 const OpenMPDirectiveKind Region =
1301 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1302 : OMPD_task;
1303 const CapturedStmt *CS = D.getCapturedStmt(Region);
1304 bool HasCancel = false;
1305 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1306 HasCancel = TD->hasCancel();
1307 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1308 HasCancel = TD->hasCancel();
1309 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1310 HasCancel = TD->hasCancel();
1311 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1312 HasCancel = TD->hasCancel();
1313
1314 CodeGenFunction CGF(CGM, true);
1315 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1316 InnermostKind, HasCancel, Action);
1317 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1318 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1319 if (!Tied)
1320 NumberOfParts = Action.getNumberOfParts();
1321 return Res;
1322}
1323
1325 bool AtCurrentPoint) {
1326 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1327 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1328
1329 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1330 if (AtCurrentPoint) {
1331 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1332 CGF.Builder.GetInsertBlock());
1333 } else {
1334 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1335 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1336 }
1337}
1338
1340 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1341 if (Elem.ServiceInsertPt) {
1342 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1343 Elem.ServiceInsertPt = nullptr;
1344 Ptr->eraseFromParent();
1345 }
1346}
1347
1350 SmallString<128> &Buffer) {
1351 llvm::raw_svector_ostream OS(Buffer);
1352 // Build debug location
1354 OS << ";" << PLoc.getFilename() << ";";
1355 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1356 OS << FD->getQualifiedNameAsString();
1357 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1358 return OS.str();
1359}
1360
1363 unsigned Flags, bool EmitLoc) {
1364 uint32_t SrcLocStrSize;
1365 llvm::Constant *SrcLocStr;
1366 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1367 llvm::codegenoptions::NoDebugInfo) ||
1368 Loc.isInvalid()) {
1369 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1370 } else {
1371 std::string FunctionName;
1372 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1373 FunctionName = FD->getQualifiedNameAsString();
1375 const char *FileName = PLoc.getFilename();
1376 unsigned Line = PLoc.getLine();
1377 unsigned Column = PLoc.getColumn();
1378 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1379 Column, SrcLocStrSize);
1380 }
1381 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1382 return OMPBuilder.getOrCreateIdent(
1383 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1384}
1385
1388 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1389 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1390 // the clang invariants used below might be broken.
1391 if (CGM.getLangOpts().OpenMPIRBuilder) {
1392 SmallString<128> Buffer;
1393 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1394 uint32_t SrcLocStrSize;
1395 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1396 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1397 return OMPBuilder.getOrCreateThreadID(
1398 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1399 }
1400
1401 llvm::Value *ThreadID = nullptr;
1402 // Check whether we've already cached a load of the thread id in this
1403 // function.
1404 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1405 if (I != OpenMPLocThreadIDMap.end()) {
1406 ThreadID = I->second.ThreadID;
1407 if (ThreadID != nullptr)
1408 return ThreadID;
1409 }
1410 // If exceptions are enabled, do not use parameter to avoid possible crash.
1411 if (auto *OMPRegionInfo =
1412 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1413 if (OMPRegionInfo->getThreadIDVariable()) {
1414 // Check if this an outlined function with thread id passed as argument.
1415 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1416 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1417 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1418 !CGF.getLangOpts().CXXExceptions ||
1419 CGF.Builder.GetInsertBlock() == TopBlock ||
1420 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1421 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1422 TopBlock ||
1423 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1424 CGF.Builder.GetInsertBlock()) {
1425 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1426 // If value loaded in entry block, cache it and use it everywhere in
1427 // function.
1428 if (CGF.Builder.GetInsertBlock() == TopBlock)
1429 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1430 return ThreadID;
1431 }
1432 }
1433 }
1434
1435 // This is not an outlined function region - need to call __kmpc_int32
1436 // kmpc_global_thread_num(ident_t *loc).
1437 // Generate thread id value and cache this value for use across the
1438 // function.
1439 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1440 if (!Elem.ServiceInsertPt)
1442 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1443 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1445 llvm::CallInst *Call = CGF.Builder.CreateCall(
1446 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1447 OMPRTL___kmpc_global_thread_num),
1448 emitUpdateLocation(CGF, Loc));
1449 Call->setCallingConv(CGF.getRuntimeCC());
1450 Elem.ThreadID = Call;
1451 return Call;
1452}
1453
1455 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1456 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1458 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1459 }
1460 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1461 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1462 UDRMap.erase(D);
1463 FunctionUDRMap.erase(CGF.CurFn);
1464 }
1465 auto I = FunctionUDMMap.find(CGF.CurFn);
1466 if (I != FunctionUDMMap.end()) {
1467 for(const auto *D : I->second)
1468 UDMMap.erase(D);
1469 FunctionUDMMap.erase(I);
1470 }
1473}
1474
1476 return OMPBuilder.IdentPtr;
1477}
1478
1480 if (!Kmpc_MicroTy) {
1481 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1482 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1483 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1484 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1485 }
1486 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1487}
1488
1489static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1491 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1492 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1493 if (!DevTy)
1494 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1495
1496 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1497 case OMPDeclareTargetDeclAttr::DT_Host:
1498 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1499 break;
1500 case OMPDeclareTargetDeclAttr::DT_NoHost:
1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1502 break;
1503 case OMPDeclareTargetDeclAttr::DT_Any:
1504 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1505 break;
1506 default:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1508 break;
1509 }
1510}
1511
1512static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1514 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1515 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1516 if (!MapType)
1517 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1518 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1519 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1520 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1521 break;
1522 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1523 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1524 break;
1525 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1527 break;
1528 default:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1530 break;
1531 }
1532}
1533
1534static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1535 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1536 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1537
1538 auto FileInfoCallBack = [&]() {
1540 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1541
1542 llvm::sys::fs::UniqueID ID;
1543 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1544 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1545 }
1546
1547 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1548 };
1549
1550 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1551}
1552
1554 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1555
1556 auto LinkageForVariable = [&VD, this]() {
1558 };
1559
1560 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1561
1562 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1564 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1567 VD->isExternallyVisible(),
1569 VD->getCanonicalDecl()->getBeginLoc()),
1570 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1571 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1572 LinkageForVariable);
1573
1574 if (!addr)
1575 return ConstantAddress::invalid();
1576 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1577}
1578
1579llvm::Constant *
1581 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1583 // Lookup the entry, lazily creating it if necessary.
1584 std::string Suffix = getName({"cache", ""});
1585 return OMPBuilder.getOrCreateInternalVariable(
1586 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1587}
1588
1590 const VarDecl *VD,
1591 Address VDAddr,
1593 if (CGM.getLangOpts().OpenMPUseTLS &&
1595 return VDAddr;
1596
1597 llvm::Type *VarTy = VDAddr.getElementType();
1598 llvm::Value *Args[] = {
1600 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1603 return Address(
1604 CGF.EmitRuntimeCall(
1605 OMPBuilder.getOrCreateRuntimeFunction(
1606 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1607 Args),
1608 CGF.Int8Ty, VDAddr.getAlignment());
1609}
1610
1612 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1613 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1614 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1615 // library.
1616 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1617 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1618 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1619 OMPLoc);
1620 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1621 // to register constructor/destructor for variable.
1622 llvm::Value *Args[] = {
1623 OMPLoc,
1624 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1625 Ctor, CopyCtor, Dtor};
1626 CGF.EmitRuntimeCall(
1627 OMPBuilder.getOrCreateRuntimeFunction(
1628 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1629 Args);
1630}
1631
1633 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1634 bool PerformInit, CodeGenFunction *CGF) {
1635 if (CGM.getLangOpts().OpenMPUseTLS &&
1637 return nullptr;
1638
1639 VD = VD->getDefinition(CGM.getContext());
1640 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1641 QualType ASTTy = VD->getType();
1642
1643 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1644 const Expr *Init = VD->getAnyInitializer();
1645 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1646 // Generate function that re-emits the declaration's initializer into the
1647 // threadprivate copy of the variable VD
1648 CodeGenFunction CtorCGF(CGM);
1649 FunctionArgList Args;
1650 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1651 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1653 Args.push_back(&Dst);
1654
1656 CGM.getContext().VoidPtrTy, Args);
1657 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1658 std::string Name = getName({"__kmpc_global_ctor_", ""});
1659 llvm::Function *Fn =
1661 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1662 Args, Loc, Loc);
1663 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1664 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1666 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1667 VDAddr.getAlignment());
1668 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1669 /*IsInitializer=*/true);
1670 ArgVal = CtorCGF.EmitLoadOfScalar(
1671 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1673 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1674 CtorCGF.FinishFunction();
1675 Ctor = Fn;
1676 }
1678 // Generate function that emits destructor call for the threadprivate copy
1679 // of the variable VD
1680 CodeGenFunction DtorCGF(CGM);
1681 FunctionArgList Args;
1682 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1683 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1685 Args.push_back(&Dst);
1686
1688 CGM.getContext().VoidTy, Args);
1689 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1690 std::string Name = getName({"__kmpc_global_dtor_", ""});
1691 llvm::Function *Fn =
1693 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1694 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1695 Loc, Loc);
1696 // Create a scope with an artificial location for the body of this function.
1697 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1698 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1699 DtorCGF.GetAddrOfLocalVar(&Dst),
1700 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1701 DtorCGF.emitDestroy(
1702 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1703 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1704 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1705 DtorCGF.FinishFunction();
1706 Dtor = Fn;
1707 }
1708 // Do not emit init function if it is not required.
1709 if (!Ctor && !Dtor)
1710 return nullptr;
1711
1712 // Copying constructor for the threadprivate variable.
1713 // Must be NULL - reserved by runtime, but currently it requires that this
1714 // parameter is always NULL. Otherwise it fires assertion.
1715 CopyCtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1716 if (Ctor == nullptr) {
1717 Ctor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1718 }
1719 if (Dtor == nullptr) {
1720 Dtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1721 }
1722 if (!CGF) {
1723 auto *InitFunctionTy =
1724 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1725 std::string Name = getName({"__omp_threadprivate_init_", ""});
1726 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1727 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1728 CodeGenFunction InitCGF(CGM);
1729 FunctionArgList ArgList;
1730 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1731 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1732 Loc, Loc);
1733 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1734 InitCGF.FinishFunction();
1735 return InitFunction;
1736 }
1737 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1738 }
1739 return nullptr;
1740}
1741
1743 llvm::GlobalValue *GV) {
1744 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1745 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1746
1747 // We only need to handle active 'indirect' declare target functions.
1748 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1749 return;
1750
1751 // Get a mangled name to store the new device global in.
1752 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1754 SmallString<128> Name;
1755 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1756
1757 // We need to generate a new global to hold the address of the indirectly
1758 // called device function. Doing this allows us to keep the visibility and
1759 // linkage of the associated function unchanged while allowing the runtime to
1760 // access its value.
1761 llvm::GlobalValue *Addr = GV;
1762 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1763 Addr = new llvm::GlobalVariable(
1765 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1766 nullptr, llvm::GlobalValue::NotThreadLocal,
1767 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1768 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1769 }
1770
1771 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1773 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1774 llvm::GlobalValue::WeakODRLinkage);
1775}
1776
1778 QualType VarType,
1779 StringRef Name) {
1780 std::string Suffix = getName({"artificial", ""});
1781 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1782 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1783 VarLVType, Twine(Name).concat(Suffix).str());
1784 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1786 GAddr->setThreadLocal(/*Val=*/true);
1787 return Address(GAddr, GAddr->getValueType(),
1789 }
1790 std::string CacheSuffix = getName({"cache", ""});
1791 llvm::Value *Args[] = {
1795 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1796 /*isSigned=*/false),
1797 OMPBuilder.getOrCreateInternalVariable(
1799 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1800 return Address(
1802 CGF.EmitRuntimeCall(
1803 OMPBuilder.getOrCreateRuntimeFunction(
1804 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1805 Args),
1806 CGF.Builder.getPtrTy(0)),
1807 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1808}
1809
1811 const RegionCodeGenTy &ThenGen,
1812 const RegionCodeGenTy &ElseGen) {
1813 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1814
1815 // If the condition constant folds and can be elided, try to avoid emitting
1816 // the condition and the dead arm of the if/else.
1817 bool CondConstant;
1818 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1819 if (CondConstant)
1820 ThenGen(CGF);
1821 else
1822 ElseGen(CGF);
1823 return;
1824 }
1825
1826 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1827 // emit the conditional branch.
1828 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1829 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1830 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1831 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1832
1833 // Emit the 'then' code.
1834 CGF.EmitBlock(ThenBlock);
1835 ThenGen(CGF);
1836 CGF.EmitBranch(ContBlock);
1837 // Emit the 'else' code if present.
1838 // There is no need to emit line number for unconditional branch.
1840 CGF.EmitBlock(ElseBlock);
1841 ElseGen(CGF);
1842 // There is no need to emit line number for unconditional branch.
1844 CGF.EmitBranch(ContBlock);
1845 // Emit the continuation block for code after the if.
1846 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1847}
1848
1850 llvm::Function *OutlinedFn,
1851 ArrayRef<llvm::Value *> CapturedVars,
1852 const Expr *IfCond,
1853 llvm::Value *NumThreads) {
1854 if (!CGF.HaveInsertPoint())
1855 return;
1856 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1857 auto &M = CGM.getModule();
1858 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1859 this](CodeGenFunction &CGF, PrePostActionTy &) {
1860 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1862 llvm::Value *Args[] = {
1863 RTLoc,
1864 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1865 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1867 RealArgs.append(std::begin(Args), std::end(Args));
1868 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1869
1870 llvm::FunctionCallee RTLFn =
1871 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1872 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1873 };
1874 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1875 this](CodeGenFunction &CGF, PrePostActionTy &) {
1877 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1878 // Build calls:
1879 // __kmpc_serialized_parallel(&Loc, GTid);
1880 llvm::Value *Args[] = {RTLoc, ThreadID};
1881 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1882 M, OMPRTL___kmpc_serialized_parallel),
1883 Args);
1884
1885 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1886 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1887 RawAddress ZeroAddrBound =
1889 /*Name=*/".bound.zero.addr");
1890 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1892 // ThreadId for serialized parallels is 0.
1893 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1894 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1895 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1896
1897 // Ensure we do not inline the function. This is trivially true for the ones
1898 // passed to __kmpc_fork_call but the ones called in serialized regions
1899 // could be inlined. This is not a perfect but it is closer to the invariant
1900 // we want, namely, every data environment starts with a new function.
1901 // TODO: We should pass the if condition to the runtime function and do the
1902 // handling there. Much cleaner code.
1903 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1904 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1905 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1906
1907 // __kmpc_end_serialized_parallel(&Loc, GTid);
1908 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1909 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1910 M, OMPRTL___kmpc_end_serialized_parallel),
1911 EndArgs);
1912 };
1913 if (IfCond) {
1914 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1915 } else {
1916 RegionCodeGenTy ThenRCG(ThenGen);
1917 ThenRCG(CGF);
1918 }
1919}
1920
1921// If we're inside an (outlined) parallel region, use the region info's
1922// thread-ID variable (it is passed in a first argument of the outlined function
1923// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1924// regular serial code region, get thread ID by calling kmp_int32
1925// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1926// return the address of that temp.
1929 if (auto *OMPRegionInfo =
1930 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1931 if (OMPRegionInfo->getThreadIDVariable())
1932 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1933
1934 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1935 QualType Int32Ty =
1936 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1937 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1938 CGF.EmitStoreOfScalar(ThreadID,
1939 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1940
1941 return ThreadIDTemp;
1942}
1943
1944llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1945 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1946 std::string Name = getName({Prefix, "var"});
1947 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1948}
1949
1950namespace {
1951/// Common pre(post)-action for different OpenMP constructs.
1952class CommonActionTy final : public PrePostActionTy {
1953 llvm::FunctionCallee EnterCallee;
1954 ArrayRef<llvm::Value *> EnterArgs;
1955 llvm::FunctionCallee ExitCallee;
1956 ArrayRef<llvm::Value *> ExitArgs;
1957 bool Conditional;
1958 llvm::BasicBlock *ContBlock = nullptr;
1959
1960public:
1961 CommonActionTy(llvm::FunctionCallee EnterCallee,
1962 ArrayRef<llvm::Value *> EnterArgs,
1963 llvm::FunctionCallee ExitCallee,
1964 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1965 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1966 ExitArgs(ExitArgs), Conditional(Conditional) {}
1967 void Enter(CodeGenFunction &CGF) override {
1968 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1969 if (Conditional) {
1970 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1971 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1972 ContBlock = CGF.createBasicBlock("omp_if.end");
1973 // Generate the branch (If-stmt)
1974 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1975 CGF.EmitBlock(ThenBlock);
1976 }
1977 }
1978 void Done(CodeGenFunction &CGF) {
1979 // Emit the rest of blocks/branches
1980 CGF.EmitBranch(ContBlock);
1981 CGF.EmitBlock(ContBlock, true);
1982 }
1983 void Exit(CodeGenFunction &CGF) override {
1984 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1985 }
1986};
1987} // anonymous namespace
1988
1990 StringRef CriticalName,
1991 const RegionCodeGenTy &CriticalOpGen,
1992 SourceLocation Loc, const Expr *Hint) {
1993 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1994 // CriticalOpGen();
1995 // __kmpc_end_critical(ident_t *, gtid, Lock);
1996 // Prepare arguments and build a call to __kmpc_critical
1997 if (!CGF.HaveInsertPoint())
1998 return;
1999 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2000 getCriticalRegionLock(CriticalName)};
2001 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2002 std::end(Args));
2003 if (Hint) {
2004 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2005 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2006 }
2007 CommonActionTy Action(
2008 OMPBuilder.getOrCreateRuntimeFunction(
2009 CGM.getModule(),
2010 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2011 EnterArgs,
2012 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2013 OMPRTL___kmpc_end_critical),
2014 Args);
2015 CriticalOpGen.setAction(Action);
2016 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2017}
2018
2020 const RegionCodeGenTy &MasterOpGen,
2022 if (!CGF.HaveInsertPoint())
2023 return;
2024 // if(__kmpc_master(ident_t *, gtid)) {
2025 // MasterOpGen();
2026 // __kmpc_end_master(ident_t *, gtid);
2027 // }
2028 // Prepare arguments and build a call to __kmpc_master
2029 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2030 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2031 CGM.getModule(), OMPRTL___kmpc_master),
2032 Args,
2033 OMPBuilder.getOrCreateRuntimeFunction(
2034 CGM.getModule(), OMPRTL___kmpc_end_master),
2035 Args,
2036 /*Conditional=*/true);
2037 MasterOpGen.setAction(Action);
2038 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2039 Action.Done(CGF);
2040}
2041
2043 const RegionCodeGenTy &MaskedOpGen,
2044 SourceLocation Loc, const Expr *Filter) {
2045 if (!CGF.HaveInsertPoint())
2046 return;
2047 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2048 // MaskedOpGen();
2049 // __kmpc_end_masked(iden_t *, gtid);
2050 // }
2051 // Prepare arguments and build a call to __kmpc_masked
2052 llvm::Value *FilterVal = Filter
2053 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2054 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2055 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2056 FilterVal};
2057 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2058 getThreadID(CGF, Loc)};
2059 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2060 CGM.getModule(), OMPRTL___kmpc_masked),
2061 Args,
2062 OMPBuilder.getOrCreateRuntimeFunction(
2063 CGM.getModule(), OMPRTL___kmpc_end_masked),
2064 ArgsEnd,
2065 /*Conditional=*/true);
2066 MaskedOpGen.setAction(Action);
2067 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2068 Action.Done(CGF);
2069}
2070
2073 if (!CGF.HaveInsertPoint())
2074 return;
2075 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2076 OMPBuilder.createTaskyield(CGF.Builder);
2077 } else {
2078 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2079 llvm::Value *Args[] = {
2081 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2082 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2083 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2084 Args);
2085 }
2086
2087 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2088 Region->emitUntiedSwitch(CGF);
2089}
2090
2092 const RegionCodeGenTy &TaskgroupOpGen,
2094 if (!CGF.HaveInsertPoint())
2095 return;
2096 // __kmpc_taskgroup(ident_t *, gtid);
2097 // TaskgroupOpGen();
2098 // __kmpc_end_taskgroup(ident_t *, gtid);
2099 // Prepare arguments and build a call to __kmpc_taskgroup
2100 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2101 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2102 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2103 Args,
2104 OMPBuilder.getOrCreateRuntimeFunction(
2105 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2106 Args);
2107 TaskgroupOpGen.setAction(Action);
2108 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2109}
2110
2111/// Given an array of pointers to variables, project the address of a
2112/// given variable.
2114 unsigned Index, const VarDecl *Var) {
2115 // Pull out the pointer to the variable.
2116 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2117 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2118
2119 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2120 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2121}
2122
2124 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2125 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2126 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2128 ASTContext &C = CGM.getContext();
2129 // void copy_func(void *LHSArg, void *RHSArg);
2130 FunctionArgList Args;
2131 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2133 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2135 Args.push_back(&LHSArg);
2136 Args.push_back(&RHSArg);
2137 const auto &CGFI =
2138 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2139 std::string Name =
2140 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2141 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2142 llvm::GlobalValue::InternalLinkage, Name,
2143 &CGM.getModule());
2145 Fn->setDoesNotRecurse();
2146 CodeGenFunction CGF(CGM);
2147 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2148 // Dest = (void*[n])(LHSArg);
2149 // Src = (void*[n])(RHSArg);
2151 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2152 CGF.Builder.getPtrTy(0)),
2153 ArgsElemType, CGF.getPointerAlign());
2155 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2156 CGF.Builder.getPtrTy(0)),
2157 ArgsElemType, CGF.getPointerAlign());
2158 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2159 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2160 // ...
2161 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2162 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2163 const auto *DestVar =
2164 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2165 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2166
2167 const auto *SrcVar =
2168 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2169 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2170
2171 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2172 QualType Type = VD->getType();
2173 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2174 }
2175 CGF.FinishFunction();
2176 return Fn;
2177}
2178
2180 const RegionCodeGenTy &SingleOpGen,
2182 ArrayRef<const Expr *> CopyprivateVars,
2183 ArrayRef<const Expr *> SrcExprs,
2184 ArrayRef<const Expr *> DstExprs,
2185 ArrayRef<const Expr *> AssignmentOps) {
2186 if (!CGF.HaveInsertPoint())
2187 return;
2188 assert(CopyprivateVars.size() == SrcExprs.size() &&
2189 CopyprivateVars.size() == DstExprs.size() &&
2190 CopyprivateVars.size() == AssignmentOps.size());
2192 // int32 did_it = 0;
2193 // if(__kmpc_single(ident_t *, gtid)) {
2194 // SingleOpGen();
2195 // __kmpc_end_single(ident_t *, gtid);
2196 // did_it = 1;
2197 // }
2198 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2199 // <copy_func>, did_it);
2200
2201 Address DidIt = Address::invalid();
2202 if (!CopyprivateVars.empty()) {
2203 // int32 did_it = 0;
2204 QualType KmpInt32Ty =
2205 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2206 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2207 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2208 }
2209 // Prepare arguments and build a call to __kmpc_single
2210 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2211 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2212 CGM.getModule(), OMPRTL___kmpc_single),
2213 Args,
2214 OMPBuilder.getOrCreateRuntimeFunction(
2215 CGM.getModule(), OMPRTL___kmpc_end_single),
2216 Args,
2217 /*Conditional=*/true);
2218 SingleOpGen.setAction(Action);
2219 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2220 if (DidIt.isValid()) {
2221 // did_it = 1;
2222 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2223 }
2224 Action.Done(CGF);
2225 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2226 // <copy_func>, did_it);
2227 if (DidIt.isValid()) {
2228 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2229 QualType CopyprivateArrayTy = C.getConstantArrayType(
2230 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2231 /*IndexTypeQuals=*/0);
2232 // Create a list of all private variables for copyprivate.
2233 Address CopyprivateList =
2234 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2235 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2236 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2237 CGF.Builder.CreateStore(
2239 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2240 CGF.VoidPtrTy),
2241 Elem);
2242 }
2243 // Build function that copies private values from single region to all other
2244 // threads in the corresponding parallel region.
2245 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2246 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2247 SrcExprs, DstExprs, AssignmentOps, Loc);
2248 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2250 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2251 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2252 llvm::Value *Args[] = {
2253 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2254 getThreadID(CGF, Loc), // i32 <gtid>
2255 BufSize, // size_t <buf_size>
2256 CL.emitRawPointer(CGF), // void *<copyprivate list>
2257 CpyFn, // void (*) (void *, void *) <copy_func>
2258 DidItVal // i32 did_it
2259 };
2260 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2261 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2262 Args);
2263 }
2264}
2265
2267 const RegionCodeGenTy &OrderedOpGen,
2268 SourceLocation Loc, bool IsThreads) {
2269 if (!CGF.HaveInsertPoint())
2270 return;
2271 // __kmpc_ordered(ident_t *, gtid);
2272 // OrderedOpGen();
2273 // __kmpc_end_ordered(ident_t *, gtid);
2274 // Prepare arguments and build a call to __kmpc_ordered
2275 if (IsThreads) {
2276 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2277 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2278 CGM.getModule(), OMPRTL___kmpc_ordered),
2279 Args,
2280 OMPBuilder.getOrCreateRuntimeFunction(
2281 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2282 Args);
2283 OrderedOpGen.setAction(Action);
2284 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2285 return;
2286 }
2287 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2288}
2289
2291 unsigned Flags;
2292 if (Kind == OMPD_for)
2293 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2294 else if (Kind == OMPD_sections)
2295 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2296 else if (Kind == OMPD_single)
2297 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2298 else if (Kind == OMPD_barrier)
2299 Flags = OMP_IDENT_BARRIER_EXPL;
2300 else
2301 Flags = OMP_IDENT_BARRIER_IMPL;
2302 return Flags;
2303}
2304
2306 CodeGenFunction &CGF, const OMPLoopDirective &S,
2307 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2308 // Check if the loop directive is actually a doacross loop directive. In this
2309 // case choose static, 1 schedule.
2310 if (llvm::any_of(
2311 S.getClausesOfKind<OMPOrderedClause>(),
2312 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2313 ScheduleKind = OMPC_SCHEDULE_static;
2314 // Chunk size is 1 in this case.
2315 llvm::APInt ChunkSize(32, 1);
2316 ChunkExpr = IntegerLiteral::Create(
2317 CGF.getContext(), ChunkSize,
2318 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2319 SourceLocation());
2320 }
2321}
2322
2324 OpenMPDirectiveKind Kind, bool EmitChecks,
2325 bool ForceSimpleCall) {
2326 // Check if we should use the OMPBuilder
2327 auto *OMPRegionInfo =
2328 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2329 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2330 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2331 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2332 EmitChecks));
2333 CGF.Builder.restoreIP(AfterIP);
2334 return;
2335 }
2336
2337 if (!CGF.HaveInsertPoint())
2338 return;
2339 // Build call __kmpc_cancel_barrier(loc, thread_id);
2340 // Build call __kmpc_barrier(loc, thread_id);
2341 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2342 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2343 // thread_id);
2344 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2345 getThreadID(CGF, Loc)};
2346 if (OMPRegionInfo) {
2347 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2348 llvm::Value *Result = CGF.EmitRuntimeCall(
2349 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2350 OMPRTL___kmpc_cancel_barrier),
2351 Args);
2352 if (EmitChecks) {
2353 // if (__kmpc_cancel_barrier()) {
2354 // exit from construct;
2355 // }
2356 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2357 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2358 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2359 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2360 CGF.EmitBlock(ExitBB);
2361 // exit from construct;
2362 CodeGenFunction::JumpDest CancelDestination =
2363 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2364 CGF.EmitBranchThroughCleanup(CancelDestination);
2365 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2366 }
2367 return;
2368 }
2369 }
2370 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2371 CGM.getModule(), OMPRTL___kmpc_barrier),
2372 Args);
2373}
2374
2376 Expr *ME, bool IsFatal) {
2377 llvm::Value *MVL =
2378 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2379 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2380 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2381 // *message)
2382 llvm::Value *Args[] = {
2383 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2384 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2385 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2386 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2387 CGM.getModule(), OMPRTL___kmpc_error),
2388 Args);
2389}
2390
2391/// Map the OpenMP loop schedule to the runtime enumeration.
2392static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2393 bool Chunked, bool Ordered) {
2394 switch (ScheduleKind) {
2395 case OMPC_SCHEDULE_static:
2396 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2397 : (Ordered ? OMP_ord_static : OMP_sch_static);
2398 case OMPC_SCHEDULE_dynamic:
2399 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2400 case OMPC_SCHEDULE_guided:
2401 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2402 case OMPC_SCHEDULE_runtime:
2403 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2404 case OMPC_SCHEDULE_auto:
2405 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2407 assert(!Chunked && "chunk was specified but schedule kind not known");
2408 return Ordered ? OMP_ord_static : OMP_sch_static;
2409 }
2410 llvm_unreachable("Unexpected runtime schedule");
2411}
2412
2413/// Map the OpenMP distribute schedule to the runtime enumeration.
2414static OpenMPSchedType
2416 // only static is allowed for dist_schedule
2417 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2418}
2419
2421 bool Chunked) const {
2422 OpenMPSchedType Schedule =
2423 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2424 return Schedule == OMP_sch_static;
2425}
2426
2428 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2429 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2430 return Schedule == OMP_dist_sch_static;
2431}
2432
2434 bool Chunked) const {
2435 OpenMPSchedType Schedule =
2436 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2437 return Schedule == OMP_sch_static_chunked;
2438}
2439
2441 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2442 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2443 return Schedule == OMP_dist_sch_static_chunked;
2444}
2445
2447 OpenMPSchedType Schedule =
2448 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2449 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2450 return Schedule != OMP_sch_static;
2451}
2452
2453static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2456 int Modifier = 0;
2457 switch (M1) {
2458 case OMPC_SCHEDULE_MODIFIER_monotonic:
2459 Modifier = OMP_sch_modifier_monotonic;
2460 break;
2461 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2462 Modifier = OMP_sch_modifier_nonmonotonic;
2463 break;
2464 case OMPC_SCHEDULE_MODIFIER_simd:
2465 if (Schedule == OMP_sch_static_chunked)
2466 Schedule = OMP_sch_static_balanced_chunked;
2467 break;
2470 break;
2471 }
2472 switch (M2) {
2473 case OMPC_SCHEDULE_MODIFIER_monotonic:
2474 Modifier = OMP_sch_modifier_monotonic;
2475 break;
2476 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2477 Modifier = OMP_sch_modifier_nonmonotonic;
2478 break;
2479 case OMPC_SCHEDULE_MODIFIER_simd:
2480 if (Schedule == OMP_sch_static_chunked)
2481 Schedule = OMP_sch_static_balanced_chunked;
2482 break;
2485 break;
2486 }
2487 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2488 // If the static schedule kind is specified or if the ordered clause is
2489 // specified, and if the nonmonotonic modifier is not specified, the effect is
2490 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2491 // modifier is specified, the effect is as if the nonmonotonic modifier is
2492 // specified.
2493 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2494 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2495 Schedule == OMP_sch_static_balanced_chunked ||
2496 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2497 Schedule == OMP_dist_sch_static_chunked ||
2498 Schedule == OMP_dist_sch_static))
2499 Modifier = OMP_sch_modifier_nonmonotonic;
2500 }
2501 return Schedule | Modifier;
2502}
2503
2506 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2507 bool Ordered, const DispatchRTInput &DispatchValues) {
2508 if (!CGF.HaveInsertPoint())
2509 return;
2510 OpenMPSchedType Schedule = getRuntimeSchedule(
2511 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2512 assert(Ordered ||
2513 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2514 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2515 Schedule != OMP_sch_static_balanced_chunked));
2516 // Call __kmpc_dispatch_init(
2517 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2518 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2519 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2520
2521 // If the Chunk was not specified in the clause - use default value 1.
2522 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2523 : CGF.Builder.getIntN(IVSize, 1);
2524 llvm::Value *Args[] = {
2525 emitUpdateLocation(CGF, Loc),
2526 getThreadID(CGF, Loc),
2527 CGF.Builder.getInt32(addMonoNonMonoModifier(
2528 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2529 DispatchValues.LB, // Lower
2530 DispatchValues.UB, // Upper
2531 CGF.Builder.getIntN(IVSize, 1), // Stride
2532 Chunk // Chunk
2533 };
2534 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2535 Args);
2536}
2537
2540 if (!CGF.HaveInsertPoint())
2541 return;
2542 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2543 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2544 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2545}
2546
2548 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2549 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2551 const CGOpenMPRuntime::StaticRTInput &Values) {
2552 if (!CGF.HaveInsertPoint())
2553 return;
2554
2555 assert(!Values.Ordered);
2556 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2557 Schedule == OMP_sch_static_balanced_chunked ||
2558 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2559 Schedule == OMP_dist_sch_static ||
2560 Schedule == OMP_dist_sch_static_chunked);
2561
2562 // Call __kmpc_for_static_init(
2563 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2564 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2565 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2566 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2567 llvm::Value *Chunk = Values.Chunk;
2568 if (Chunk == nullptr) {
2569 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2570 Schedule == OMP_dist_sch_static) &&
2571 "expected static non-chunked schedule");
2572 // If the Chunk was not specified in the clause - use default value 1.
2573 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2574 } else {
2575 assert((Schedule == OMP_sch_static_chunked ||
2576 Schedule == OMP_sch_static_balanced_chunked ||
2577 Schedule == OMP_ord_static_chunked ||
2578 Schedule == OMP_dist_sch_static_chunked) &&
2579 "expected static chunked schedule");
2580 }
2581 llvm::Value *Args[] = {
2582 UpdateLocation,
2583 ThreadId,
2584 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2585 M2)), // Schedule type
2586 Values.IL.emitRawPointer(CGF), // &isLastIter
2587 Values.LB.emitRawPointer(CGF), // &LB
2588 Values.UB.emitRawPointer(CGF), // &UB
2589 Values.ST.emitRawPointer(CGF), // &Stride
2590 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2591 Chunk // Chunk
2592 };
2593 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2594}
2595
2598 OpenMPDirectiveKind DKind,
2599 const OpenMPScheduleTy &ScheduleKind,
2600 const StaticRTInput &Values) {
2601 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2602 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2603 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2604 "Expected loop-based or sections-based directive.");
2605 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2607 ? OMP_IDENT_WORK_LOOP
2608 : OMP_IDENT_WORK_SECTIONS);
2609 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2610 llvm::FunctionCallee StaticInitFunction =
2611 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2612 false);
2614 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2615 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2616}
2617
2621 const CGOpenMPRuntime::StaticRTInput &Values) {
2622 OpenMPSchedType ScheduleNum =
2623 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2624 llvm::Value *UpdatedLocation =
2625 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2626 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2627 llvm::FunctionCallee StaticInitFunction;
2628 bool isGPUDistribute =
2629 CGM.getLangOpts().OpenMPIsTargetDevice &&
2630 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2631 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2632 Values.IVSize, Values.IVSigned, isGPUDistribute);
2633
2634 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2635 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2637}
2638
2641 OpenMPDirectiveKind DKind) {
2642 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2643 DKind == OMPD_sections) &&
2644 "Expected distribute, for, or sections directive kind");
2645 if (!CGF.HaveInsertPoint())
2646 return;
2647 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2648 llvm::Value *Args[] = {
2651 (DKind == OMPD_target_teams_loop)
2652 ? OMP_IDENT_WORK_DISTRIBUTE
2653 : isOpenMPLoopDirective(DKind)
2654 ? OMP_IDENT_WORK_LOOP
2655 : OMP_IDENT_WORK_SECTIONS),
2656 getThreadID(CGF, Loc)};
2658 if (isOpenMPDistributeDirective(DKind) &&
2659 CGM.getLangOpts().OpenMPIsTargetDevice &&
2660 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2661 CGF.EmitRuntimeCall(
2662 OMPBuilder.getOrCreateRuntimeFunction(
2663 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2664 Args);
2665 else
2666 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2667 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2668 Args);
2669}
2670
2673 unsigned IVSize,
2674 bool IVSigned) {
2675 if (!CGF.HaveInsertPoint())
2676 return;
2677 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2678 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2679 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2680 Args);
2681}
2682
2684 SourceLocation Loc, unsigned IVSize,
2685 bool IVSigned, Address IL,
2686 Address LB, Address UB,
2687 Address ST) {
2688 // Call __kmpc_dispatch_next(
2689 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2690 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2691 // kmp_int[32|64] *p_stride);
2692 llvm::Value *Args[] = {
2694 IL.emitRawPointer(CGF), // &isLastIter
2695 LB.emitRawPointer(CGF), // &Lower
2696 UB.emitRawPointer(CGF), // &Upper
2697 ST.emitRawPointer(CGF) // &Stride
2698 };
2699 llvm::Value *Call = CGF.EmitRuntimeCall(
2700 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2701 return CGF.EmitScalarConversion(
2702 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2703 CGF.getContext().BoolTy, Loc);
2704}
2705
2707 llvm::Value *NumThreads,
2709 if (!CGF.HaveInsertPoint())
2710 return;
2711 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2712 llvm::Value *Args[] = {
2714 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2715 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2716 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2717 Args);
2718}
2719
2721 ProcBindKind ProcBind,
2723 if (!CGF.HaveInsertPoint())
2724 return;
2725 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2726 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2727 llvm::Value *Args[] = {
2729 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2730 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2731 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2732 Args);
2733}
2734
2736 SourceLocation Loc, llvm::AtomicOrdering AO) {
2737 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2738 OMPBuilder.createFlush(CGF.Builder);
2739 } else {
2740 if (!CGF.HaveInsertPoint())
2741 return;
2742 // Build call void __kmpc_flush(ident_t *loc)
2743 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2744 CGM.getModule(), OMPRTL___kmpc_flush),
2745 emitUpdateLocation(CGF, Loc));
2746 }
2747}
2748
2749namespace {
2750/// Indexes of fields for type kmp_task_t.
2751enum KmpTaskTFields {
2752 /// List of shared variables.
2753 KmpTaskTShareds,
2754 /// Task routine.
2755 KmpTaskTRoutine,
2756 /// Partition id for the untied tasks.
2757 KmpTaskTPartId,
2758 /// Function with call of destructors for private variables.
2759 Data1,
2760 /// Task priority.
2761 Data2,
2762 /// (Taskloops only) Lower bound.
2763 KmpTaskTLowerBound,
2764 /// (Taskloops only) Upper bound.
2765 KmpTaskTUpperBound,
2766 /// (Taskloops only) Stride.
2767 KmpTaskTStride,
2768 /// (Taskloops only) Is last iteration flag.
2769 KmpTaskTLastIter,
2770 /// (Taskloops only) Reduction data.
2771 KmpTaskTReductions,
2772};
2773} // anonymous namespace
2774
2776 // If we are in simd mode or there are no entries, we don't need to do
2777 // anything.
2778 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2779 return;
2780
2781 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2782 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2783 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2785 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2786 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2788 I != E; ++I) {
2789 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2790 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2792 I->getFirst(), EntryInfo.Line, 1);
2793 break;
2794 }
2795 }
2796 }
2797 switch (Kind) {
2798 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2799 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2800 DiagnosticsEngine::Error, "Offloading entry for target region in "
2801 "%0 is incorrect: either the "
2802 "address or the ID is invalid.");
2803 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2804 } break;
2805 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2806 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2807 DiagnosticsEngine::Error, "Offloading entry for declare target "
2808 "variable %0 is incorrect: the "
2809 "address is invalid.");
2810 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2811 } break;
2812 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2813 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2815 "Offloading entry for declare target variable is incorrect: the "
2816 "address is invalid.");
2817 CGM.getDiags().Report(DiagID);
2818 } break;
2819 }
2820 };
2821
2822 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2823}
2824
2826 if (!KmpRoutineEntryPtrTy) {
2827 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2829 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2831 KmpRoutineEntryPtrQTy = C.getPointerType(
2832 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2834 }
2835}
2836
2837namespace {
2838struct PrivateHelpersTy {
2839 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2840 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2841 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2842 PrivateElemInit(PrivateElemInit) {}
2843 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2844 const Expr *OriginalRef = nullptr;
2845 const VarDecl *Original = nullptr;
2846 const VarDecl *PrivateCopy = nullptr;
2847 const VarDecl *PrivateElemInit = nullptr;
2848 bool isLocalPrivate() const {
2849 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2850 }
2851};
2852typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2853} // anonymous namespace
2854
2855static bool isAllocatableDecl(const VarDecl *VD) {
2856 const VarDecl *CVD = VD->getCanonicalDecl();
2857 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2858 return false;
2859 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2860 // Use the default allocation.
2861 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2862 !AA->getAllocator());
2863}
2864
2865static RecordDecl *
2867 if (!Privates.empty()) {
2868 ASTContext &C = CGM.getContext();
2869 // Build struct .kmp_privates_t. {
2870 // /* private vars */
2871 // };
2872 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2873 RD->startDefinition();
2874 for (const auto &Pair : Privates) {
2875 const VarDecl *VD = Pair.second.Original;
2877 // If the private variable is a local variable with lvalue ref type,
2878 // allocate the pointer instead of the pointee type.
2879 if (Pair.second.isLocalPrivate()) {
2880 if (VD->getType()->isLValueReferenceType())
2881 Type = C.getPointerType(Type);
2882 if (isAllocatableDecl(VD))
2883 Type = C.getPointerType(Type);
2884 }
2886 if (VD->hasAttrs()) {
2887 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2888 E(VD->getAttrs().end());
2889 I != E; ++I)
2890 FD->addAttr(*I);
2891 }
2892 }
2893 RD->completeDefinition();
2894 return RD;
2895 }
2896 return nullptr;
2897}
2898
2899static RecordDecl *
2901 QualType KmpInt32Ty,
2902 QualType KmpRoutineEntryPointerQTy) {
2903 ASTContext &C = CGM.getContext();
2904 // Build struct kmp_task_t {
2905 // void * shareds;
2906 // kmp_routine_entry_t routine;
2907 // kmp_int32 part_id;
2908 // kmp_cmplrdata_t data1;
2909 // kmp_cmplrdata_t data2;
2910 // For taskloops additional fields:
2911 // kmp_uint64 lb;
2912 // kmp_uint64 ub;
2913 // kmp_int64 st;
2914 // kmp_int32 liter;
2915 // void * reductions;
2916 // };
2917 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2918 UD->startDefinition();
2919 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2920 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2921 UD->completeDefinition();
2922 QualType KmpCmplrdataTy = C.getRecordType(UD);
2923 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2924 RD->startDefinition();
2925 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2926 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2927 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2928 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2929 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2930 if (isOpenMPTaskLoopDirective(Kind)) {
2931 QualType KmpUInt64Ty =
2932 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2933 QualType KmpInt64Ty =
2934 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2935 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2936 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2937 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2938 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2939 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2940 }
2941 RD->completeDefinition();
2942 return RD;
2943}
2944
2945static RecordDecl *
2947 ArrayRef<PrivateDataTy> Privates) {
2948 ASTContext &C = CGM.getContext();
2949 // Build struct kmp_task_t_with_privates {
2950 // kmp_task_t task_data;
2951 // .kmp_privates_t. privates;
2952 // };
2953 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2954 RD->startDefinition();
2955 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2956 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2957 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2958 RD->completeDefinition();
2959 return RD;
2960}
2961
2962/// Emit a proxy function which accepts kmp_task_t as the second
2963/// argument.
2964/// \code
2965/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2966/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2967/// For taskloops:
2968/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2969/// tt->reductions, tt->shareds);
2970/// return 0;
2971/// }
2972/// \endcode
2973static llvm::Function *
2975 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2976 QualType KmpTaskTWithPrivatesPtrQTy,
2977 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2978 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2979 llvm::Value *TaskPrivatesMap) {
2980 ASTContext &C = CGM.getContext();
2981 FunctionArgList Args;
2982 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2984 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2985 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2987 Args.push_back(&GtidArg);
2988 Args.push_back(&TaskTypeArg);
2989 const auto &TaskEntryFnInfo =
2990 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
2991 llvm::FunctionType *TaskEntryTy =
2992 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2993 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
2994 auto *TaskEntry = llvm::Function::Create(
2995 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
2996 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
2997 TaskEntry->setDoesNotRecurse();
2998 CodeGenFunction CGF(CGM);
2999 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3000 Loc, Loc);
3001
3002 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3003 // tt,
3004 // For taskloops:
3005 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3006 // tt->task_data.shareds);
3007 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3008 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3009 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3010 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3011 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3012 const auto *KmpTaskTWithPrivatesQTyRD =
3013 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3014 LValue Base =
3015 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3016 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3017 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3018 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3019 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3020
3021 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3022 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3023 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3024 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3025 CGF.ConvertTypeForMem(SharedsPtrTy));
3026
3027 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3028 llvm::Value *PrivatesParam;
3029 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3030 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3031 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3032 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3033 } else {
3034 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3035 }
3036
3037 llvm::Value *CommonArgs[] = {
3038 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3039 CGF.Builder
3041 CGF.VoidPtrTy, CGF.Int8Ty)
3042 .emitRawPointer(CGF)};
3043 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3044 std::end(CommonArgs));
3045 if (isOpenMPTaskLoopDirective(Kind)) {
3046 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3047 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3048 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3049 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3050 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3051 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3052 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3053 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3054 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3055 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3056 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3057 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3058 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3059 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3060 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3061 CallArgs.push_back(LBParam);
3062 CallArgs.push_back(UBParam);
3063 CallArgs.push_back(StParam);
3064 CallArgs.push_back(LIParam);
3065 CallArgs.push_back(RParam);
3066 }
3067 CallArgs.push_back(SharedsParam);
3068
3069 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3070 CallArgs);
3071 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3072 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3073 CGF.FinishFunction();
3074 return TaskEntry;
3075}
3076
3079 QualType KmpInt32Ty,
3080 QualType KmpTaskTWithPrivatesPtrQTy,
3081 QualType KmpTaskTWithPrivatesQTy) {
3082 ASTContext &C = CGM.getContext();
3083 FunctionArgList Args;
3084 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3086 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3087 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3089 Args.push_back(&GtidArg);
3090 Args.push_back(&TaskTypeArg);
3091 const auto &DestructorFnInfo =
3092 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3093 llvm::FunctionType *DestructorFnTy =
3094 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3095 std::string Name =
3096 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3097 auto *DestructorFn =
3098 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3099 Name, &CGM.getModule());
3100 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3101 DestructorFnInfo);
3102 DestructorFn->setDoesNotRecurse();
3103 CodeGenFunction CGF(CGM);
3104 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3105 Args, Loc, Loc);
3106
3108 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3109 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3110 const auto *KmpTaskTWithPrivatesQTyRD =
3111 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3112 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3113 Base = CGF.EmitLValueForField(Base, *FI);
3114 for (const auto *Field :
3115 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3116 if (QualType::DestructionKind DtorKind =
3117 Field->getType().isDestructedType()) {
3118 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3119 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3120 }
3121 }
3122 CGF.FinishFunction();
3123 return DestructorFn;
3124}
3125
3126/// Emit a privates mapping function for correct handling of private and
3127/// firstprivate variables.
3128/// \code
3129/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3130/// **noalias priv1,..., <tyn> **noalias privn) {
3131/// *priv1 = &.privates.priv1;
3132/// ...;
3133/// *privn = &.privates.privn;
3134/// }
3135/// \endcode
3136static llvm::Value *
3138 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3139 ArrayRef<PrivateDataTy> Privates) {
3140 ASTContext &C = CGM.getContext();
3141 FunctionArgList Args;
3142 ImplicitParamDecl TaskPrivatesArg(
3143 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3144 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3146 Args.push_back(&TaskPrivatesArg);
3147 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3148 unsigned Counter = 1;
3149 for (const Expr *E : Data.PrivateVars) {
3150 Args.push_back(ImplicitParamDecl::Create(
3151 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3152 C.getPointerType(C.getPointerType(E->getType()))
3153 .withConst()
3154 .withRestrict(),
3156 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3157 PrivateVarsPos[VD] = Counter;
3158 ++Counter;
3159 }
3160 for (const Expr *E : Data.FirstprivateVars) {
3161 Args.push_back(ImplicitParamDecl::Create(
3162 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3163 C.getPointerType(C.getPointerType(E->getType()))
3164 .withConst()
3165 .withRestrict(),
3167 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3168 PrivateVarsPos[VD] = Counter;
3169 ++Counter;
3170 }
3171 for (const Expr *E : Data.LastprivateVars) {
3172 Args.push_back(ImplicitParamDecl::Create(
3173 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3174 C.getPointerType(C.getPointerType(E->getType()))
3175 .withConst()
3176 .withRestrict(),
3178 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3179 PrivateVarsPos[VD] = Counter;
3180 ++Counter;
3181 }
3182 for (const VarDecl *VD : Data.PrivateLocals) {
3184 if (VD->getType()->isLValueReferenceType())
3185 Ty = C.getPointerType(Ty);
3186 if (isAllocatableDecl(VD))
3187 Ty = C.getPointerType(Ty);
3188 Args.push_back(ImplicitParamDecl::Create(
3189 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3190 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3192 PrivateVarsPos[VD] = Counter;
3193 ++Counter;
3194 }
3195 const auto &TaskPrivatesMapFnInfo =
3196 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3197 llvm::FunctionType *TaskPrivatesMapTy =
3198 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3199 std::string Name =
3200 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3201 auto *TaskPrivatesMap = llvm::Function::Create(
3202 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3203 &CGM.getModule());
3204 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3205 TaskPrivatesMapFnInfo);
3206 if (CGM.getLangOpts().Optimize) {
3207 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3208 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3209 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3210 }
3211 CodeGenFunction CGF(CGM);
3212 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3213 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3214
3215 // *privi = &.privates.privi;
3217 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3218 TaskPrivatesArg.getType()->castAs<PointerType>());
3219 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3220 Counter = 0;
3221 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3222 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3223 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3224 LValue RefLVal =
3225 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3226 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3227 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3228 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3229 ++Counter;
3230 }
3231 CGF.FinishFunction();
3232 return TaskPrivatesMap;
3233}
3234
3235/// Emit initialization for private variables in task-based directives.
3238 Address KmpTaskSharedsPtr, LValue TDBase,
3239 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3240 QualType SharedsTy, QualType SharedsPtrTy,
3241 const OMPTaskDataTy &Data,
3242 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3243 ASTContext &C = CGF.getContext();
3244 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3245 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3246 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3247 ? OMPD_taskloop
3248 : OMPD_task;
3249 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3250 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3251 LValue SrcBase;
3252 bool IsTargetTask =
3253 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3254 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3255 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3256 // PointersArray, SizesArray, and MappersArray. The original variables for
3257 // these arrays are not captured and we get their addresses explicitly.
3258 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3259 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3260 SrcBase = CGF.MakeAddrLValue(
3262 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3263 CGF.ConvertTypeForMem(SharedsTy)),
3264 SharedsTy);
3265 }
3266 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3267 for (const PrivateDataTy &Pair : Privates) {
3268 // Do not initialize private locals.
3269 if (Pair.second.isLocalPrivate()) {
3270 ++FI;
3271 continue;
3272 }
3273 const VarDecl *VD = Pair.second.PrivateCopy;
3274 const Expr *Init = VD->getAnyInitializer();
3275 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3276 !CGF.isTrivialInitializer(Init)))) {
3277 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3278 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3279 const VarDecl *OriginalVD = Pair.second.Original;
3280 // Check if the variable is the target-based BasePointersArray,
3281 // PointersArray, SizesArray, or MappersArray.
3282 LValue SharedRefLValue;
3283 QualType Type = PrivateLValue.getType();
3284 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3285 if (IsTargetTask && !SharedField) {
3286 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3287 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3288 cast<CapturedDecl>(OriginalVD->getDeclContext())
3289 ->getNumParams() == 0 &&
3290 isa<TranslationUnitDecl>(
3291 cast<CapturedDecl>(OriginalVD->getDeclContext())
3292 ->getDeclContext()) &&
3293 "Expected artificial target data variable.");
3294 SharedRefLValue =
3295 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3296 } else if (ForDup) {
3297 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3298 SharedRefLValue = CGF.MakeAddrLValue(
3299 SharedRefLValue.getAddress().withAlignment(
3300 C.getDeclAlign(OriginalVD)),
3301 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3302 SharedRefLValue.getTBAAInfo());
3303 } else if (CGF.LambdaCaptureFields.count(
3304 Pair.second.Original->getCanonicalDecl()) > 0 ||
3305 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3306 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3307 } else {
3308 // Processing for implicitly captured variables.
3309 InlinedOpenMPRegionRAII Region(
3310 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3311 /*HasCancel=*/false, /*NoInheritance=*/true);
3312 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3313 }
3314 if (Type->isArrayType()) {
3315 // Initialize firstprivate array.
3316 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3317 // Perform simple memcpy.
3318 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3319 } else {
3320 // Initialize firstprivate array using element-by-element
3321 // initialization.
3323 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3324 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3325 Address SrcElement) {
3326 // Clean up any temporaries needed by the initialization.
3327 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3328 InitScope.addPrivate(Elem, SrcElement);
3329 (void)InitScope.Privatize();
3330 // Emit initialization for single element.
3331 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3332 CGF, &CapturesInfo);
3333 CGF.EmitAnyExprToMem(Init, DestElement,
3334 Init->getType().getQualifiers(),
3335 /*IsInitializer=*/false);
3336 });
3337 }
3338 } else {
3339 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3340 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3341 (void)InitScope.Privatize();
3342 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3343 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3344 /*capturedByInit=*/false);
3345 }
3346 } else {
3347 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3348 }
3349 }
3350 ++FI;
3351 }
3352}
3353
3354/// Check if duplication function is required for taskloops.
3356 ArrayRef<PrivateDataTy> Privates) {
3357 bool InitRequired = false;
3358 for (const PrivateDataTy &Pair : Privates) {
3359 if (Pair.second.isLocalPrivate())
3360 continue;
3361 const VarDecl *VD = Pair.second.PrivateCopy;
3362 const Expr *Init = VD->getAnyInitializer();
3363 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3365 if (InitRequired)
3366 break;
3367 }
3368 return InitRequired;
3369}
3370
3371
3372/// Emit task_dup function (for initialization of
3373/// private/firstprivate/lastprivate vars and last_iter flag)
3374/// \code
3375/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3376/// lastpriv) {
3377/// // setup lastprivate flag
3378/// task_dst->last = lastpriv;
3379/// // could be constructor calls here...
3380/// }
3381/// \endcode
3382static llvm::Value *
3385 QualType KmpTaskTWithPrivatesPtrQTy,
3386 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3387 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3388 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3389 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3390 ASTContext &C = CGM.getContext();
3391 FunctionArgList Args;
3392 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3393 KmpTaskTWithPrivatesPtrQTy,
3395 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3396 KmpTaskTWithPrivatesPtrQTy,
3398 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3400 Args.push_back(&DstArg);
3401 Args.push_back(&SrcArg);
3402 Args.push_back(&LastprivArg);
3403 const auto &TaskDupFnInfo =
3404 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3405 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3406 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3407 auto *TaskDup = llvm::Function::Create(
3408 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3409 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3410 TaskDup->setDoesNotRecurse();
3411 CodeGenFunction CGF(CGM);
3412 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3413 Loc);
3414
3415 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3416 CGF.GetAddrOfLocalVar(&DstArg),
3417 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3418 // task_dst->liter = lastpriv;
3419 if (WithLastIter) {
3420 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3422 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3423 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3424 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3425 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3426 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3427 }
3428
3429 // Emit initial values for private copies (if any).
3430 assert(!Privates.empty());
3431 Address KmpTaskSharedsPtr = Address::invalid();
3432 if (!Data.FirstprivateVars.empty()) {
3433 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3434 CGF.GetAddrOfLocalVar(&SrcArg),
3435 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3437 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3438 KmpTaskSharedsPtr = Address(
3440 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3441 KmpTaskTShareds)),
3442 Loc),
3443 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3444 }
3445 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3446 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3447 CGF.FinishFunction();
3448 return TaskDup;
3449}
3450
3451/// Checks if destructor function is required to be generated.
3452/// \return true if cleanups are required, false otherwise.
3453static bool
3454checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3455 ArrayRef<PrivateDataTy> Privates) {
3456 for (const PrivateDataTy &P : Privates) {
3457 if (P.second.isLocalPrivate())
3458 continue;
3459 QualType Ty = P.second.Original->getType().getNonReferenceType();
3460 if (Ty.isDestructedType())
3461 return true;
3462 }
3463 return false;
3464}
3465
3466namespace {
3467/// Loop generator for OpenMP iterator expression.
3468class OMPIteratorGeneratorScope final
3469 : public CodeGenFunction::OMPPrivateScope {
3470 CodeGenFunction &CGF;
3471 const OMPIteratorExpr *E = nullptr;
3474 OMPIteratorGeneratorScope() = delete;
3475 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3476
3477public:
3478 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3479 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3480 if (!E)
3481 return;
3483 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3484 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3485 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3486 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3487 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3488 addPrivate(
3489 HelperData.CounterVD,
3490 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3491 }
3492 Privatize();
3493
3494 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3495 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3496 LValue CLVal =
3497 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3498 HelperData.CounterVD->getType());
3499 // Counter = 0;
3501 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3502 CLVal);
3503 CodeGenFunction::JumpDest &ContDest =
3504 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3505 CodeGenFunction::JumpDest &ExitDest =
3506 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3507 // N = <number-of_iterations>;
3508 llvm::Value *N = Uppers[I];
3509 // cont:
3510 // if (Counter < N) goto body; else goto exit;
3511 CGF.EmitBlock(ContDest.getBlock());
3512 auto *CVal =
3513 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3514 llvm::Value *Cmp =
3516 ? CGF.Builder.CreateICmpSLT(CVal, N)
3517 : CGF.Builder.CreateICmpULT(CVal, N);
3518 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3519 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3520 // body:
3521 CGF.EmitBlock(BodyBB);
3522 // Iteri = Begini + Counter * Stepi;
3523 CGF.EmitIgnoredExpr(HelperData.Update);
3524 }
3525 }
3526 ~OMPIteratorGeneratorScope() {
3527 if (!E)
3528 return;
3529 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3530 // Counter = Counter + 1;
3531 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3532 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3533 // goto cont;
3534 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3535 // exit:
3536 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3537 }
3538 }
3539};
3540} // namespace
3541
3542static std::pair<llvm::Value *, llvm::Value *>
3544 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3545 llvm::Value *Addr;
3546 if (OASE) {
3547 const Expr *Base = OASE->getBase();
3548 Addr = CGF.EmitScalarExpr(Base);
3549 } else {
3550 Addr = CGF.EmitLValue(E).getPointer(CGF);
3551 }
3552 llvm::Value *SizeVal;
3553 QualType Ty = E->getType();
3554 if (OASE) {
3555 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3556 for (const Expr *SE : OASE->getDimensions()) {
3557 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3558 Sz = CGF.EmitScalarConversion(
3559 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3560 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3561 }
3562 } else if (const auto *ASE =
3563 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3564 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3565 Address UpAddrAddress = UpAddrLVal.getAddress();
3566 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3567 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3568 /*Idx0=*/1);
3569 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3570 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3571 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3572 } else {
3573 SizeVal = CGF.getTypeSize(Ty);
3574 }
3575 return std::make_pair(Addr, SizeVal);
3576}
3577
3578/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3579static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3580 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3581 if (KmpTaskAffinityInfoTy.isNull()) {
3582 RecordDecl *KmpAffinityInfoRD =
3583 C.buildImplicitRecord("kmp_task_affinity_info_t");
3584 KmpAffinityInfoRD->startDefinition();
3585 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3586 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3587 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3588 KmpAffinityInfoRD->completeDefinition();
3589 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3590 }
3591}
3592
3596 llvm::Function *TaskFunction, QualType SharedsTy,
3597 Address Shareds, const OMPTaskDataTy &Data) {
3600 // Aggregate privates and sort them by the alignment.
3601 const auto *I = Data.PrivateCopies.begin();
3602 for (const Expr *E : Data.PrivateVars) {
3603 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3604 Privates.emplace_back(
3605 C.getDeclAlign(VD),
3606 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3607 /*PrivateElemInit=*/nullptr));
3608 ++I;
3609 }
3610 I = Data.FirstprivateCopies.begin();
3611 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3612 for (const Expr *E : Data.FirstprivateVars) {
3613 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3614 Privates.emplace_back(
3615 C.getDeclAlign(VD),
3616 PrivateHelpersTy(
3617 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3618 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3619 ++I;
3620 ++IElemInitRef;
3621 }
3622 I = Data.LastprivateCopies.begin();
3623 for (const Expr *E : Data.LastprivateVars) {
3624 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3625 Privates.emplace_back(
3626 C.getDeclAlign(VD),
3627 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3628 /*PrivateElemInit=*/nullptr));
3629 ++I;
3630 }
3631 for (const VarDecl *VD : Data.PrivateLocals) {
3632 if (isAllocatableDecl(VD))
3633 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3634 else
3635 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3636 }
3637 llvm::stable_sort(Privates,
3638 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3639 return L.first > R.first;
3640 });
3641 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3642 // Build type kmp_routine_entry_t (if not built yet).
3643 emitKmpRoutineEntryT(KmpInt32Ty);
3644 // Build type kmp_task_t (if not built yet).
3645 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3648 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3649 }
3651 } else {
3652 assert((D.getDirectiveKind() == OMPD_task ||
3653 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3654 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3655 "Expected taskloop, task or target directive");
3656 if (SavedKmpTaskTQTy.isNull()) {
3658 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3659 }
3661 }
3662 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3663 // Build particular struct kmp_task_t for the given task.
3664 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3666 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3667 QualType KmpTaskTWithPrivatesPtrQTy =
3668 C.getPointerType(KmpTaskTWithPrivatesQTy);
3669 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3670 llvm::Value *KmpTaskTWithPrivatesTySize =
3671 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3672 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3673
3674 // Emit initial values for private copies (if any).
3675 llvm::Value *TaskPrivatesMap = nullptr;
3676 llvm::Type *TaskPrivatesMapTy =
3677 std::next(TaskFunction->arg_begin(), 3)->getType();
3678 if (!Privates.empty()) {
3679 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3680 TaskPrivatesMap =
3681 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3682 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3683 TaskPrivatesMap, TaskPrivatesMapTy);
3684 } else {
3685 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3686 cast<llvm::PointerType>(TaskPrivatesMapTy));
3687 }
3688 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3689 // kmp_task_t *tt);
3690 llvm::Function *TaskEntry = emitProxyTaskFunction(
3691 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3692 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3693 TaskPrivatesMap);
3694
3695 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3696 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3697 // kmp_routine_entry_t *task_entry);
3698 // Task flags. Format is taken from
3699 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3700 // description of kmp_tasking_flags struct.
3701 enum {
3702 TiedFlag = 0x1,
3703 FinalFlag = 0x2,
3704 DestructorsFlag = 0x8,
3705 PriorityFlag = 0x20,
3706 DetachableFlag = 0x40,
3707 };
3708 unsigned Flags = Data.Tied ? TiedFlag : 0;
3709 bool NeedsCleanup = false;
3710 if (!Privates.empty()) {
3711 NeedsCleanup =
3712 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3713 if (NeedsCleanup)
3714 Flags = Flags | DestructorsFlag;
3715 }
3716 if (Data.Priority.getInt())
3717 Flags = Flags | PriorityFlag;
3718 if (D.hasClausesOfKind<OMPDetachClause>())
3719 Flags = Flags | DetachableFlag;
3720 llvm::Value *TaskFlags =
3721 Data.Final.getPointer()
3722 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3723 CGF.Builder.getInt32(FinalFlag),
3724 CGF.Builder.getInt32(/*C=*/0))
3725 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3726 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3727 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3729 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3731 TaskEntry, KmpRoutineEntryPtrTy)};
3732 llvm::Value *NewTask;
3733 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3734 // Check if we have any device clause associated with the directive.
3735 const Expr *Device = nullptr;
3736 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3737 Device = C->getDevice();
3738 // Emit device ID if any otherwise use default value.
3739 llvm::Value *DeviceID;
3740 if (Device)
3741 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3742 CGF.Int64Ty, /*isSigned=*/true);
3743 else
3744 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3745 AllocArgs.push_back(DeviceID);
3746 NewTask = CGF.EmitRuntimeCall(
3747 OMPBuilder.getOrCreateRuntimeFunction(
3748 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3749 AllocArgs);
3750 } else {
3751 NewTask =
3752 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3753 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3754 AllocArgs);
3755 }
3756 // Emit detach clause initialization.
3757 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3758 // task_descriptor);
3759 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3760 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3761 LValue EvtLVal = CGF.EmitLValue(Evt);
3762
3763 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3764 // int gtid, kmp_task_t *task);
3765 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3766 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3767 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3768 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3769 OMPBuilder.getOrCreateRuntimeFunction(
3770 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3771 {Loc, Tid, NewTask});
3772 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3773 Evt->getExprLoc());
3774 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3775 }
3776 // Process affinity clauses.
3777 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3778 // Process list of affinity data.
3780 Address AffinitiesArray = Address::invalid();
3781 // Calculate number of elements to form the array of affinity data.
3782 llvm::Value *NumOfElements = nullptr;
3783 unsigned NumAffinities = 0;
3784 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3785 if (const Expr *Modifier = C->getModifier()) {
3786 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3787 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3788 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3789 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3790 NumOfElements =
3791 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3792 }
3793 } else {
3794 NumAffinities += C->varlist_size();
3795 }
3796 }
3798 // Fields ids in kmp_task_affinity_info record.
3799 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3800
3801 QualType KmpTaskAffinityInfoArrayTy;
3802 if (NumOfElements) {
3803 NumOfElements = CGF.Builder.CreateNUWAdd(
3804 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3805 auto *OVE = new (C) OpaqueValueExpr(
3806 Loc,
3807 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3808 VK_PRValue);
3809 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3810 RValue::get(NumOfElements));
3811 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3813 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3814 // Properly emit variable-sized array.
3815 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3817 CGF.EmitVarDecl(*PD);
3818 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3819 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3820 /*isSigned=*/false);
3821 } else {
3822 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3824 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3825 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3826 AffinitiesArray =
3827 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3828 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3829 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3830 /*isSigned=*/false);
3831 }
3832
3833 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3834 // Fill array by elements without iterators.
3835 unsigned Pos = 0;
3836 bool HasIterator = false;
3837 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3838 if (C->getModifier()) {
3839 HasIterator = true;
3840 continue;
3841 }
3842 for (const Expr *E : C->varlist()) {
3843 llvm::Value *Addr;
3844 llvm::Value *Size;
3845 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3846 LValue Base =
3847 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3849 // affs[i].base_addr = &<Affinities[i].second>;
3850 LValue BaseAddrLVal = CGF.EmitLValueForField(
3851 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3852 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3853 BaseAddrLVal);
3854 // affs[i].len = sizeof(<Affinities[i].second>);
3855 LValue LenLVal = CGF.EmitLValueForField(
3856 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3857 CGF.EmitStoreOfScalar(Size, LenLVal);
3858 ++Pos;
3859 }
3860 }
3861 LValue PosLVal;
3862 if (HasIterator) {
3863 PosLVal = CGF.MakeAddrLValue(
3864 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3865 C.getSizeType());
3866 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3867 }
3868 // Process elements with iterators.
3869 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3870 const Expr *Modifier = C->getModifier();
3871 if (!Modifier)
3872 continue;
3873 OMPIteratorGeneratorScope IteratorScope(
3874 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3875 for (const Expr *E : C->varlist()) {
3876 llvm::Value *Addr;
3877 llvm::Value *Size;
3878 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3879 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3880 LValue Base =
3881 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3883 // affs[i].base_addr = &<Affinities[i].second>;
3884 LValue BaseAddrLVal = CGF.EmitLValueForField(
3885 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3886 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3887 BaseAddrLVal);
3888 // affs[i].len = sizeof(<Affinities[i].second>);
3889 LValue LenLVal = CGF.EmitLValueForField(
3890 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3891 CGF.EmitStoreOfScalar(Size, LenLVal);
3892 Idx = CGF.Builder.CreateNUWAdd(
3893 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3894 CGF.EmitStoreOfScalar(Idx, PosLVal);
3895 }
3896 }
3897 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3898 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3899 // naffins, kmp_task_affinity_info_t *affin_list);
3900 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3901 llvm::Value *GTid = getThreadID(CGF, Loc);
3902 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3903 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3904 // FIXME: Emit the function and ignore its result for now unless the
3905 // runtime function is properly implemented.
3906 (void)CGF.EmitRuntimeCall(
3907 OMPBuilder.getOrCreateRuntimeFunction(
3908 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3909 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3910 }
3911 llvm::Value *NewTaskNewTaskTTy =
3913 NewTask, KmpTaskTWithPrivatesPtrTy);
3914 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3915 KmpTaskTWithPrivatesQTy);
3916 LValue TDBase =
3917 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3918 // Fill the data in the resulting kmp_task_t record.
3919 // Copy shareds if there are any.
3920 Address KmpTaskSharedsPtr = Address::invalid();
3921 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3922 KmpTaskSharedsPtr = Address(
3923 CGF.EmitLoadOfScalar(
3925 TDBase,
3926 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3927 Loc),
3928 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3929 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3930 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3931 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3932 }
3933 // Emit initial values for private copies (if any).
3935 if (!Privates.empty()) {
3936 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3937 SharedsTy, SharedsPtrTy, Data, Privates,
3938 /*ForDup=*/false);
3939 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3940 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3941 Result.TaskDupFn = emitTaskDupFunction(
3942 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3943 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3944 /*WithLastIter=*/!Data.LastprivateVars.empty());
3945 }
3946 }
3947 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3948 enum { Priority = 0, Destructors = 1 };
3949 // Provide pointer to function with destructors for privates.
3950 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3951 const RecordDecl *KmpCmplrdataUD =
3952 (*FI)->getType()->getAsUnionType()->getDecl();
3953 if (NeedsCleanup) {
3954 llvm::Value *DestructorFn = emitDestructorsFunction(
3955 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3956 KmpTaskTWithPrivatesQTy);
3957 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3958 LValue DestructorsLV = CGF.EmitLValueForField(
3959 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3961 DestructorFn, KmpRoutineEntryPtrTy),
3962 DestructorsLV);
3963 }
3964 // Set priority.
3965 if (Data.Priority.getInt()) {
3966 LValue Data2LV = CGF.EmitLValueForField(
3967 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3968 LValue PriorityLV = CGF.EmitLValueForField(
3969 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3970 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3971 }
3972 Result.NewTask = NewTask;
3973 Result.TaskEntry = TaskEntry;
3974 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3975 Result.TDBase = TDBase;
3976 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3977 return Result;
3978}
3979
3980/// Translates internal dependency kind into the runtime kind.
3982 RTLDependenceKindTy DepKind;
3983 switch (K) {
3984 case OMPC_DEPEND_in:
3985 DepKind = RTLDependenceKindTy::DepIn;
3986 break;
3987 // Out and InOut dependencies must use the same code.
3988 case OMPC_DEPEND_out:
3989 case OMPC_DEPEND_inout:
3990 DepKind = RTLDependenceKindTy::DepInOut;
3991 break;
3992 case OMPC_DEPEND_mutexinoutset:
3993 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
3994 break;
3995 case OMPC_DEPEND_inoutset:
3996 DepKind = RTLDependenceKindTy::DepInOutSet;
3997 break;
3998 case OMPC_DEPEND_outallmemory:
3999 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4000 break;
4001 case OMPC_DEPEND_source:
4002 case OMPC_DEPEND_sink:
4003 case OMPC_DEPEND_depobj:
4004 case OMPC_DEPEND_inoutallmemory:
4006 llvm_unreachable("Unknown task dependence type");
4007 }
4008 return DepKind;
4009}
4010
4011/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4012static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4013 QualType &FlagsTy) {
4014 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4015 if (KmpDependInfoTy.isNull()) {
4016 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4017 KmpDependInfoRD->startDefinition();
4018 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4019 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4020 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4021 KmpDependInfoRD->completeDefinition();
4022 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4023 }
4024}
4025
4026std::pair<llvm::Value *, LValue>
4030 QualType FlagsTy;
4031 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4032 RecordDecl *KmpDependInfoRD =
4033 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4034 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4036 DepobjLVal.getAddress().withElementType(
4037 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4038 KmpDependInfoPtrTy->castAs<PointerType>());
4039 Address DepObjAddr = CGF.Builder.CreateGEP(
4040 CGF, Base.getAddress(),
4041 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4042 LValue NumDepsBase = CGF.MakeAddrLValue(
4043 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4044 // NumDeps = deps[i].base_addr;
4045 LValue BaseAddrLVal = CGF.EmitLValueForField(
4046 NumDepsBase,
4047 *std::next(KmpDependInfoRD->field_begin(),
4048 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4049 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4050 return std::make_pair(NumDeps, Base);
4051}
4052
4053static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4054 llvm::PointerUnion<unsigned *, LValue *> Pos,
4056 Address DependenciesArray) {
4057 CodeGenModule &CGM = CGF.CGM;
4058 ASTContext &C = CGM.getContext();
4059 QualType FlagsTy;
4060 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4061 RecordDecl *KmpDependInfoRD =
4062 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4063 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4064
4065 OMPIteratorGeneratorScope IteratorScope(
4066 CGF, cast_or_null<OMPIteratorExpr>(
4067 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4068 : nullptr));
4069 for (const Expr *E : Data.DepExprs) {
4070 llvm::Value *Addr;
4071 llvm::Value *Size;
4072
4073 // The expression will be a nullptr in the 'omp_all_memory' case.
4074 if (E) {
4075 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4076 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4077 } else {
4078 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4079 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4080 }
4081 LValue Base;
4082 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4083 Base = CGF.MakeAddrLValue(
4084 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4085 } else {
4086 assert(E && "Expected a non-null expression");
4087 LValue &PosLVal = *cast<LValue *>(Pos);
4088 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4089 Base = CGF.MakeAddrLValue(
4090 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4091 }
4092 // deps[i].base_addr = &<Dependencies[i].second>;
4093 LValue BaseAddrLVal = CGF.EmitLValueForField(
4094 Base,
4095 *std::next(KmpDependInfoRD->field_begin(),
4096 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4097 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4098 // deps[i].len = sizeof(<Dependencies[i].second>);
4099 LValue LenLVal = CGF.EmitLValueForField(
4100 Base, *std::next(KmpDependInfoRD->field_begin(),
4101 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4102 CGF.EmitStoreOfScalar(Size, LenLVal);
4103 // deps[i].flags = <Dependencies[i].first>;
4104 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4105 LValue FlagsLVal = CGF.EmitLValueForField(
4106 Base,
4107 *std::next(KmpDependInfoRD->field_begin(),
4108 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4110 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4111 FlagsLVal);
4112 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4113 ++(*P);
4114 } else {
4115 LValue &PosLVal = *cast<LValue *>(Pos);
4116 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4117 Idx = CGF.Builder.CreateNUWAdd(Idx,
4118 llvm::ConstantInt::get(Idx->getType(), 1));
4119 CGF.EmitStoreOfScalar(Idx, PosLVal);
4120 }
4121 }
4122}
4123
4125 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4127 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4128 "Expected depobj dependency kind.");
4130 SmallVector<LValue, 4> SizeLVals;
4131 ASTContext &C = CGF.getContext();
4132 {
4133 OMPIteratorGeneratorScope IteratorScope(
4134 CGF, cast_or_null<OMPIteratorExpr>(
4135 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4136 : nullptr));
4137 for (const Expr *E : Data.DepExprs) {
4138 llvm::Value *NumDeps;
4139 LValue Base;
4140 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4141 std::tie(NumDeps, Base) =
4142 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4143 LValue NumLVal = CGF.MakeAddrLValue(
4144 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4145 C.getUIntPtrType());
4146 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4147 NumLVal.getAddress());
4148 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4149 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4150 CGF.EmitStoreOfScalar(Add, NumLVal);
4151 SizeLVals.push_back(NumLVal);
4152 }
4153 }
4154 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4155 llvm::Value *Size =
4156 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4157 Sizes.push_back(Size);
4158 }
4159 return Sizes;
4160}
4161
4163 QualType &KmpDependInfoTy,
4164 LValue PosLVal,
4166 Address DependenciesArray) {
4167 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4168 "Expected depobj dependency kind.");
4169 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4170 {
4171 OMPIteratorGeneratorScope IteratorScope(
4172 CGF, cast_or_null<OMPIteratorExpr>(
4173 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4174 : nullptr));
4175 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4176 const Expr *E = Data.DepExprs[I];
4177 llvm::Value *NumDeps;
4178 LValue Base;
4179 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4180 std::tie(NumDeps, Base) =
4181 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4182
4183 // memcopy dependency data.
4184 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4185 ElSize,
4186 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4187 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4188 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4189 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4190
4191 // Increase pos.
4192 // pos += size;
4193 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4194 CGF.EmitStoreOfScalar(Add, PosLVal);
4195 }
4196 }
4197}
4198
4199std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4202 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4203 return D.DepExprs.empty();
4204 }))
4205 return std::make_pair(nullptr, Address::invalid());
4206 // Process list of dependencies.
4208 Address DependenciesArray = Address::invalid();
4209 llvm::Value *NumOfElements = nullptr;
4210 unsigned NumDependencies = std::accumulate(
4211 Dependencies.begin(), Dependencies.end(), 0,
4212 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4213 return D.DepKind == OMPC_DEPEND_depobj
4214 ? V
4215 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4216 });
4217 QualType FlagsTy;
4218 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4219 bool HasDepobjDeps = false;
4220 bool HasRegularWithIterators = false;
4221 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4222 llvm::Value *NumOfRegularWithIterators =
4223 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4224 // Calculate number of depobj dependencies and regular deps with the
4225 // iterators.
4226 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4227 if (D.DepKind == OMPC_DEPEND_depobj) {
4230 for (llvm::Value *Size : Sizes) {
4231 NumOfDepobjElements =
4232 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4233 }
4234 HasDepobjDeps = true;
4235 continue;
4236 }
4237 // Include number of iterations, if any.
4238
4239 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4240 llvm::Value *ClauseIteratorSpace =
4241 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4242 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4243 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4244 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4245 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4246 }
4247 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4248 ClauseIteratorSpace,
4249 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4250 NumOfRegularWithIterators =
4251 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4252 HasRegularWithIterators = true;
4253 continue;
4254 }
4255 }
4256
4257 QualType KmpDependInfoArrayTy;
4258 if (HasDepobjDeps || HasRegularWithIterators) {
4259 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4260 /*isSigned=*/false);
4261 if (HasDepobjDeps) {
4262 NumOfElements =
4263 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4264 }
4265 if (HasRegularWithIterators) {
4266 NumOfElements =
4267 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4268 }
4269 auto *OVE = new (C) OpaqueValueExpr(
4270 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4271 VK_PRValue);
4272 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4273 RValue::get(NumOfElements));
4274 KmpDependInfoArrayTy =
4275 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4276 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4277 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4278 // Properly emit variable-sized array.
4279 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4281 CGF.EmitVarDecl(*PD);
4282 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4283 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4284 /*isSigned=*/false);
4285 } else {
4286 KmpDependInfoArrayTy = C.getConstantArrayType(
4287 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4288 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4289 DependenciesArray =
4290 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4291 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4292 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4293 /*isSigned=*/false);
4294 }
4295 unsigned Pos = 0;
4296 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4297 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4298 Dependencies[I].IteratorExpr)
4299 continue;
4300 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4301 DependenciesArray);
4302 }
4303 // Copy regular dependencies with iterators.
4304 LValue PosLVal = CGF.MakeAddrLValue(
4305 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4306 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4307 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4308 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4309 !Dependencies[I].IteratorExpr)
4310 continue;
4311 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4312 DependenciesArray);
4313 }
4314 // Copy final depobj arrays without iterators.
4315 if (HasDepobjDeps) {
4316 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4317 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4318 continue;
4319 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4320 DependenciesArray);
4321 }
4322 }
4323 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4324 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4325 return std::make_pair(NumOfElements, DependenciesArray);
4326}
4327
4329 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4331 if (Dependencies.DepExprs.empty())
4332 return Address::invalid();
4333 // Process list of dependencies.
4335 Address DependenciesArray = Address::invalid();
4336 unsigned NumDependencies = Dependencies.DepExprs.size();
4337 QualType FlagsTy;
4338 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4339 RecordDecl *KmpDependInfoRD =
4340 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4341
4342 llvm::Value *Size;
4343 // Define type kmp_depend_info[<Dependencies.size()>];
4344 // For depobj reserve one extra element to store the number of elements.
4345 // It is required to handle depobj(x) update(in) construct.
4346 // kmp_depend_info[<Dependencies.size()>] deps;
4347 llvm::Value *NumDepsVal;
4348 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4349 if (const auto *IE =
4350 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4351 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4352 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4353 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4354 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4355 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4356 }
4357 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4358 NumDepsVal);
4359 CharUnits SizeInBytes =
4360 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4361 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4362 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4363 NumDepsVal =
4364 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4365 } else {
4366 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4367 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4368 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4369 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4370 Size = CGM.getSize(Sz.alignTo(Align));
4371 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4372 }
4373 // Need to allocate on the dynamic memory.
4374 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4375 // Use default allocator.
4376 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4377 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4378
4379 llvm::Value *Addr =
4380 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4381 CGM.getModule(), OMPRTL___kmpc_alloc),
4382 Args, ".dep.arr.addr");
4383 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4385 Addr, CGF.Builder.getPtrTy(0));
4386 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4387 // Write number of elements in the first element of array for depobj.
4388 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4389 // deps[i].base_addr = NumDependencies;
4390 LValue BaseAddrLVal = CGF.EmitLValueForField(
4391 Base,
4392 *std::next(KmpDependInfoRD->field_begin(),
4393 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4394 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4395 llvm::PointerUnion<unsigned *, LValue *> Pos;
4396 unsigned Idx = 1;
4397 LValue PosLVal;
4398 if (Dependencies.IteratorExpr) {
4399 PosLVal = CGF.MakeAddrLValue(
4400 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4401 C.getSizeType());
4402 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4403 /*IsInit=*/true);
4404 Pos = &PosLVal;
4405 } else {
4406 Pos = &Idx;
4407 }
4408 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4409 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4410 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4411 CGF.Int8Ty);
4412 return DependenciesArray;
4413}
4414
4418 QualType FlagsTy;
4419 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4420 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4421 C.VoidPtrTy.castAs<PointerType>());
4422 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4424 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4426 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4427 Addr.getElementType(), Addr.emitRawPointer(CGF),
4428 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4429 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4430 CGF.VoidPtrTy);
4431 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4432 // Use default allocator.
4433 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4434 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4435
4436 // _kmpc_free(gtid, addr, nullptr);
4437 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4438 CGM.getModule(), OMPRTL___kmpc_free),
4439 Args);
4440}
4441
4443 OpenMPDependClauseKind NewDepKind,
4446 QualType FlagsTy;
4447 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4448 RecordDecl *KmpDependInfoRD =
4449 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4450 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4451 llvm::Value *NumDeps;
4452 LValue Base;
4453 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4454
4455 Address Begin = Base.getAddress();
4456 // Cast from pointer to array type to pointer to single element.
4457 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4458 Begin.emitRawPointer(CGF), NumDeps);
4459 // The basic structure here is a while-do loop.
4460 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4461 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4462 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4463 CGF.EmitBlock(BodyBB);
4464 llvm::PHINode *ElementPHI =
4465 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4466 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4467 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4468 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4469 Base.getTBAAInfo());
4470 // deps[i].flags = NewDepKind;
4471 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4472 LValue FlagsLVal = CGF.EmitLValueForField(
4473 Base, *std::next(KmpDependInfoRD->field_begin(),
4474 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4476 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4477 FlagsLVal);
4478
4479 // Shift the address forward by one element.
4480 llvm::Value *ElementNext =
4481 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4482 .emitRawPointer(CGF);
4483 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4484 llvm::Value *IsEmpty =
4485 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4486 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4487 // Done.
4488 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4489}
4490
4493 llvm::Function *TaskFunction,
4494 QualType SharedsTy, Address Shareds,
4495 const Expr *IfCond,
4496 const OMPTaskDataTy &Data) {
4497 if (!CGF.HaveInsertPoint())
4498 return;
4499
4501 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4502 llvm::Value *NewTask = Result.NewTask;
4503 llvm::Function *TaskEntry = Result.TaskEntry;
4504 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4505 LValue TDBase = Result.TDBase;
4506 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4507 // Process list of dependences.
4508 Address DependenciesArray = Address::invalid();
4509 llvm::Value *NumOfElements;
4510 std::tie(NumOfElements, DependenciesArray) =
4511 emitDependClause(CGF, Data.Dependences, Loc);
4512
4513 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4514 // libcall.
4515 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4516 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4517 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4518 // list is not empty
4519 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4520 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4521 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4522 llvm::Value *DepTaskArgs[7];
4523 if (!Data.Dependences.empty()) {
4524 DepTaskArgs[0] = UpLoc;
4525 DepTaskArgs[1] = ThreadID;
4526 DepTaskArgs[2] = NewTask;
4527 DepTaskArgs[3] = NumOfElements;
4528 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4529 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4530 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4531 }
4532 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4533 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4534 if (!Data.Tied) {
4535 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4536 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4537 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4538 }
4539 if (!Data.Dependences.empty()) {
4540 CGF.EmitRuntimeCall(
4541 OMPBuilder.getOrCreateRuntimeFunction(
4542 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4543 DepTaskArgs);
4544 } else {
4545 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4546 CGM.getModule(), OMPRTL___kmpc_omp_task),
4547 TaskArgs);
4548 }
4549 // Check if parent region is untied and build return for untied task;
4550 if (auto *Region =
4551 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4552 Region->emitUntiedSwitch(CGF);
4553 };
4554
4555 llvm::Value *DepWaitTaskArgs[7];
4556 if (!Data.Dependences.empty()) {
4557 DepWaitTaskArgs[0] = UpLoc;
4558 DepWaitTaskArgs[1] = ThreadID;
4559 DepWaitTaskArgs[2] = NumOfElements;
4560 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4561 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4562 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4563 DepWaitTaskArgs[6] =
4564 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4565 }
4566 auto &M = CGM.getModule();
4567 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4568 TaskEntry, &Data, &DepWaitTaskArgs,
4570 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4571 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4572 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4573 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4574 // is specified.
4575 if (!Data.Dependences.empty())
4576 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4577 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4578 DepWaitTaskArgs);
4579 // Call proxy_task_entry(gtid, new_task);
4580 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4581 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4582 Action.Enter(CGF);
4583 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4584 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4585 OutlinedFnArgs);
4586 };
4587
4588 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4589 // kmp_task_t *new_task);
4590 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4591 // kmp_task_t *new_task);
4592 RegionCodeGenTy RCG(CodeGen);
4593 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4594 M, OMPRTL___kmpc_omp_task_begin_if0),
4595 TaskArgs,
4596 OMPBuilder.getOrCreateRuntimeFunction(
4597 M, OMPRTL___kmpc_omp_task_complete_if0),
4598 TaskArgs);
4599 RCG.setAction(Action);
4600 RCG(CGF);
4601 };
4602
4603 if (IfCond) {
4604 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4605 } else {
4606 RegionCodeGenTy ThenRCG(ThenCodeGen);
4607 ThenRCG(CGF);
4608 }
4609}
4610
4612 const OMPLoopDirective &D,
4613 llvm::Function *TaskFunction,
4614 QualType SharedsTy, Address Shareds,
4615 const Expr *IfCond,
4616 const OMPTaskDataTy &Data) {
4617 if (!CGF.HaveInsertPoint())
4618 return;
4620 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4621 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4622 // libcall.
4623 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4624 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4625 // sched, kmp_uint64 grainsize, void *task_dup);
4626 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4627 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4628 llvm::Value *IfVal;
4629 if (IfCond) {
4630 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4631 /*isSigned=*/true);
4632 } else {
4633 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4634 }
4635
4636 LValue LBLVal = CGF.EmitLValueForField(
4637 Result.TDBase,
4638 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4639 const auto *LBVar =
4640 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4641 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4642 /*IsInitializer=*/true);
4643 LValue UBLVal = CGF.EmitLValueForField(
4644 Result.TDBase,
4645 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4646 const auto *UBVar =
4647 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4648 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4649 /*IsInitializer=*/true);
4650 LValue StLVal = CGF.EmitLValueForField(
4651 Result.TDBase,
4652 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4653 const auto *StVar =
4654 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4655 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4656 /*IsInitializer=*/true);
4657 // Store reductions address.
4658 LValue RedLVal = CGF.EmitLValueForField(
4659 Result.TDBase,
4660 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4661 if (Data.Reductions) {
4662 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4663 } else {
4664 CGF.EmitNullInitialization(RedLVal.getAddress(),
4665 CGF.getContext().VoidPtrTy);
4666 }
4667 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4669 UpLoc,
4670 ThreadID,
4671 Result.NewTask,
4672 IfVal,
4673 LBLVal.getPointer(CGF),
4674 UBLVal.getPointer(CGF),
4675 CGF.EmitLoadOfScalar(StLVal, Loc),
4676 llvm::ConstantInt::getSigned(
4677 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4678 llvm::ConstantInt::getSigned(
4679 CGF.IntTy, Data.Schedule.getPointer()
4680 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4681 : NoSchedule),
4682 Data.Schedule.getPointer()
4683 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4684 /*isSigned=*/false)
4685 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4686 if (Data.HasModifier)
4687 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4688
4689 TaskArgs.push_back(Result.TaskDupFn
4691 Result.TaskDupFn, CGF.VoidPtrTy)
4692 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4693 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4694 CGM.getModule(), Data.HasModifier
4695 ? OMPRTL___kmpc_taskloop_5
4696 : OMPRTL___kmpc_taskloop),
4697 TaskArgs);
4698}
4699
4700/// Emit reduction operation for each element of array (required for
4701/// array sections) LHS op = RHS.
4702/// \param Type Type of array.
4703/// \param LHSVar Variable on the left side of the reduction operation
4704/// (references element of array in original variable).
4705/// \param RHSVar Variable on the right side of the reduction operation
4706/// (references element of array in original variable).
4707/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4708/// RHSVar.
4710 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4711 const VarDecl *RHSVar,
4712 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4713 const Expr *, const Expr *)> &RedOpGen,
4714 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4715 const Expr *UpExpr = nullptr) {
4716 // Perform element-by-element initialization.
4717 QualType ElementTy;
4718 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4719 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4720
4721 // Drill down to the base element type on both arrays.
4722 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4723 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4724
4725 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4726 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4727 // Cast from pointer to array type to pointer to single element.
4728 llvm::Value *LHSEnd =
4729 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4730 // The basic structure here is a while-do loop.
4731 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4732 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4733 llvm::Value *IsEmpty =
4734 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4735 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4736
4737 // Enter the loop body, making that address the current address.
4738 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4739 CGF.EmitBlock(BodyBB);
4740
4741 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4742
4743 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4744 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4745 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4746 Address RHSElementCurrent(
4747 RHSElementPHI, RHSAddr.getElementType(),
4748 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4749
4750 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4751 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4752 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4753 Address LHSElementCurrent(
4754 LHSElementPHI, LHSAddr.getElementType(),
4755 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4756
4757 // Emit copy.
4758 CodeGenFunction::OMPPrivateScope Scope(CGF);
4759 Scope.addPrivate(LHSVar, LHSElementCurrent);
4760 Scope.addPrivate(RHSVar, RHSElementCurrent);
4761 Scope.Privatize();
4762 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4763 Scope.ForceCleanup();
4764
4765 // Shift the address forward by one element.
4766 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4767 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4768 "omp.arraycpy.dest.element");
4769 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4770 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4771 "omp.arraycpy.src.element");
4772 // Check whether we've reached the end.
4773 llvm::Value *Done =
4774 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4775 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4776 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4777 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4778
4779 // Done.
4780 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4781}
4782
4783/// Emit reduction combiner. If the combiner is a simple expression emit it as
4784/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4785/// UDR combiner function.
4787 const Expr *ReductionOp) {
4788 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4789 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4790 if (const auto *DRE =
4791 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4792 if (const auto *DRD =
4793 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4794 std::pair<llvm::Function *, llvm::Function *> Reduction =
4797 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4798 CGF.EmitIgnoredExpr(ReductionOp);
4799 return;
4800 }
4801 CGF.EmitIgnoredExpr(ReductionOp);
4802}
4803
4805 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4807 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4809
4810 // void reduction_func(void *LHSArg, void *RHSArg);
4811 FunctionArgList Args;
4812 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4814 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4816 Args.push_back(&LHSArg);
4817 Args.push_back(&RHSArg);
4818 const auto &CGFI =
4820 std::string Name = getReductionFuncName(ReducerName);
4821 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4822 llvm::GlobalValue::InternalLinkage, Name,
4823 &CGM.getModule());
4825 Fn->setDoesNotRecurse();
4826 CodeGenFunction CGF(CGM);
4827 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4828
4829 // Dst = (void*[n])(LHSArg);
4830 // Src = (void*[n])(RHSArg);
4832 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4833 CGF.Builder.getPtrTy(0)),
4834 ArgsElemType, CGF.getPointerAlign());
4836 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4837 CGF.Builder.getPtrTy(0)),
4838 ArgsElemType, CGF.getPointerAlign());
4839
4840 // ...
4841 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4842 // ...
4844 const auto *IPriv = Privates.begin();
4845 unsigned Idx = 0;
4846 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4847 const auto *RHSVar =
4848 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4849 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4850 const auto *LHSVar =
4851 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4852 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4853 QualType PrivTy = (*IPriv)->getType();
4854 if (PrivTy->isVariablyModifiedType()) {
4855 // Get array size and emit VLA type.
4856 ++Idx;
4857 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4858 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4859 const VariableArrayType *VLA =
4860 CGF.getContext().getAsVariableArrayType(PrivTy);
4861 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4863 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4864 CGF.EmitVariablyModifiedType(PrivTy);
4865 }
4866 }
4867 Scope.Privatize();
4868 IPriv = Privates.begin();
4869 const auto *ILHS = LHSExprs.begin();
4870 const auto *IRHS = RHSExprs.begin();
4871 for (const Expr *E : ReductionOps) {
4872 if ((*IPriv)->getType()->isArrayType()) {
4873 // Emit reduction for array section.
4874 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4875 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4877 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4878 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4879 emitReductionCombiner(CGF, E);
4880 });
4881 } else {
4882 // Emit reduction for array subscript or single variable.
4884 }
4885 ++IPriv;
4886 ++ILHS;
4887 ++IRHS;
4888 }
4889 Scope.ForceCleanup();
4890 CGF.FinishFunction();
4891 return Fn;
4892}
4893
4895 const Expr *ReductionOp,
4896 const Expr *PrivateRef,
4897 const DeclRefExpr *LHS,
4898 const DeclRefExpr *RHS) {
4899 if (PrivateRef->getType()->isArrayType()) {
4900 // Emit reduction for array section.
4901 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4902 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4904 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4905 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4906 emitReductionCombiner(CGF, ReductionOp);
4907 });
4908 } else {
4909 // Emit reduction for array subscript or single variable.
4910 emitReductionCombiner(CGF, ReductionOp);
4911 }
4912}
4913
4915 ArrayRef<const Expr *> Privates,
4916 ArrayRef<const Expr *> LHSExprs,
4917 ArrayRef<const Expr *> RHSExprs,
4918 ArrayRef<const Expr *> ReductionOps,
4919 ReductionOptionsTy Options) {
4920 if (!CGF.HaveInsertPoint())
4921 return;
4922
4923 bool WithNowait = Options.WithNowait;
4924 bool SimpleReduction = Options.SimpleReduction;
4925
4926 // Next code should be emitted for reduction:
4927 //
4928 // static kmp_critical_name lock = { 0 };
4929 //
4930 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4931 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4932 // ...
4933 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4934 // *(Type<n>-1*)rhs[<n>-1]);
4935 // }
4936 //
4937 // ...
4938 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4939 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4940 // RedList, reduce_func, &<lock>)) {
4941 // case 1:
4942 // ...
4943 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4944 // ...
4945 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4946 // break;
4947 // case 2:
4948 // ...
4949 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4950 // ...
4951 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4952 // break;
4953 // default:;
4954 // }
4955 //
4956 // if SimpleReduction is true, only the next code is generated:
4957 // ...
4958 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4959 // ...
4960
4962
4963 if (SimpleReduction) {
4965 const auto *IPriv = Privates.begin();
4966 const auto *ILHS = LHSExprs.begin();
4967 const auto *IRHS = RHSExprs.begin();
4968 for (const Expr *E : ReductionOps) {
4969 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4970 cast<DeclRefExpr>(*IRHS));
4971 ++IPriv;
4972 ++ILHS;
4973 ++IRHS;
4974 }
4975 return;
4976 }
4977
4978 // 1. Build a list of reduction variables.
4979 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4980 auto Size = RHSExprs.size();
4981 for (const Expr *E : Privates) {
4983 // Reserve place for array size.
4984 ++Size;
4985 }
4986 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4987 QualType ReductionArrayTy = C.getConstantArrayType(
4988 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
4989 /*IndexTypeQuals=*/0);
4990 RawAddress ReductionList =
4991 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4992 const auto *IPriv = Privates.begin();
4993 unsigned Idx = 0;
4994 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4995 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
4996 CGF.Builder.CreateStore(
4998 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
4999 Elem);
5000 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5001 // Store array size.
5002 ++Idx;
5003 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5004 llvm::Value *Size = CGF.Builder.CreateIntCast(
5005 CGF.getVLASize(
5006 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5007 .NumElts,
5008 CGF.SizeTy, /*isSigned=*/false);
5009 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5010 Elem);
5011 }
5012 }
5013
5014 // 2. Emit reduce_func().
5015 llvm::Function *ReductionFn = emitReductionFunction(
5016 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5017 Privates, LHSExprs, RHSExprs, ReductionOps);
5018
5019 // 3. Create static kmp_critical_name lock = { 0 };
5020 std::string Name = getName({"reduction"});
5021 llvm::Value *Lock = getCriticalRegionLock(Name);
5022
5023 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5024 // RedList, reduce_func, &<lock>);
5025 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5026 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5027 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5028 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5029 ReductionList.getPointer(), CGF.VoidPtrTy);
5030 llvm::Value *Args[] = {
5031 IdentTLoc, // ident_t *<loc>
5032 ThreadId, // i32 <gtid>
5033 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5034 ReductionArrayTySize, // size_type sizeof(RedList)
5035 RL, // void *RedList
5036 ReductionFn, // void (*) (void *, void *) <reduce_func>
5037 Lock // kmp_critical_name *&<lock>
5038 };
5039 llvm::Value *Res = CGF.EmitRuntimeCall(
5040 OMPBuilder.getOrCreateRuntimeFunction(
5041 CGM.getModule(),
5042 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5043 Args);
5044
5045 // 5. Build switch(res)
5046 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5047 llvm::SwitchInst *SwInst =
5048 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5049
5050 // 6. Build case 1:
5051 // ...
5052 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5053 // ...
5054 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5055 // break;
5056 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5057 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5058 CGF.EmitBlock(Case1BB);
5059
5060 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5061 llvm::Value *EndArgs[] = {
5062 IdentTLoc, // ident_t *<loc>
5063 ThreadId, // i32 <gtid>
5064 Lock // kmp_critical_name *&<lock>
5065 };
5066 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5067 CodeGenFunction &CGF, PrePostActionTy &Action) {
5069 const auto *IPriv = Privates.begin();
5070 const auto *ILHS = LHSExprs.begin();
5071 const auto *IRHS = RHSExprs.begin();
5072 for (const Expr *E : ReductionOps) {
5073 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5074 cast<DeclRefExpr>(*IRHS));
5075 ++IPriv;
5076 ++ILHS;
5077 ++IRHS;
5078 }
5079 };
5080 RegionCodeGenTy RCG(CodeGen);
5081 CommonActionTy Action(
5082 nullptr, {},
5083 OMPBuilder.getOrCreateRuntimeFunction(
5084 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5085 : OMPRTL___kmpc_end_reduce),
5086 EndArgs);
5087 RCG.setAction(Action);
5088 RCG(CGF);
5089
5090 CGF.EmitBranch(DefaultBB);
5091
5092 // 7. Build case 2:
5093 // ...
5094 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5095 // ...
5096 // break;
5097 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5098 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5099 CGF.EmitBlock(Case2BB);
5100
5101 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5102 CodeGenFunction &CGF, PrePostActionTy &Action) {
5103 const auto *ILHS = LHSExprs.begin();
5104 const auto *IRHS = RHSExprs.begin();
5105 const auto *IPriv = Privates.begin();
5106 for (const Expr *E : ReductionOps) {
5107 const Expr *XExpr = nullptr;
5108 const Expr *EExpr = nullptr;
5109 const Expr *UpExpr = nullptr;
5110 BinaryOperatorKind BO = BO_Comma;
5111 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5112 if (BO->getOpcode() == BO_Assign) {
5113 XExpr = BO->getLHS();
5114 UpExpr = BO->getRHS();
5115 }
5116 }
5117 // Try to emit update expression as a simple atomic.
5118 const Expr *RHSExpr = UpExpr;
5119 if (RHSExpr) {
5120 // Analyze RHS part of the whole expression.
5121 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5122 RHSExpr->IgnoreParenImpCasts())) {
5123 // If this is a conditional operator, analyze its condition for
5124 // min/max reduction operator.
5125 RHSExpr = ACO->getCond();
5126 }
5127 if (const auto *BORHS =
5128 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5129 EExpr = BORHS->getRHS();
5130 BO = BORHS->getOpcode();
5131 }
5132 }
5133 if (XExpr) {
5134 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5135 auto &&AtomicRedGen = [BO, VD,
5136 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5137 const Expr *EExpr, const Expr *UpExpr) {
5138 LValue X = CGF.EmitLValue(XExpr);
5139 RValue E;
5140 if (EExpr)
5141 E = CGF.EmitAnyExpr(EExpr);
5142 CGF.EmitOMPAtomicSimpleUpdateExpr(
5143 X, E, BO, /*IsXLHSInRHSPart=*/true,
5144 llvm::AtomicOrdering::Monotonic, Loc,
5145 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5146 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5147 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5148 CGF.emitOMPSimpleStore(
5149 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5150 VD->getType().getNonReferenceType(), Loc);
5151 PrivateScope.addPrivate(VD, LHSTemp);
5152 (void)PrivateScope.Privatize();
5153 return CGF.EmitAnyExpr(UpExpr);
5154 });
5155 };
5156 if ((*IPriv)->getType()->isArrayType()) {
5157 // Emit atomic reduction for array section.
5158 const auto *RHSVar =
5159 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5160 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5161 AtomicRedGen, XExpr, EExpr, UpExpr);
5162 } else {
5163 // Emit atomic reduction for array subscript or single variable.
5164 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5165 }
5166 } else {
5167 // Emit as a critical region.
5168 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5169 const Expr *, const Expr *) {
5171 std::string Name = RT.getName({"atomic_reduction"});
5173 CGF, Name,
5174 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5175 Action.Enter(CGF);
5177 },
5178 Loc);
5179 };
5180 if ((*IPriv)->getType()->isArrayType()) {
5181 const auto *LHSVar =
5182 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5183 const auto *RHSVar =
5184 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5185 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5186 CritRedGen);
5187 } else {
5188 CritRedGen(CGF, nullptr, nullptr, nullptr);
5189 }
5190 }
5191 ++ILHS;
5192 ++IRHS;
5193 ++IPriv;
5194 }
5195 };
5196 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5197 if (!WithNowait) {
5198 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5199 llvm::Value *EndArgs[] = {
5200 IdentTLoc, // ident_t *<loc>
5201 ThreadId, // i32 <gtid>
5202 Lock // kmp_critical_name *&<lock>
5203 };
5204 CommonActionTy Action(nullptr, {},
5205 OMPBuilder.getOrCreateRuntimeFunction(
5206 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5207 EndArgs);
5208 AtomicRCG.setAction(Action);
5209 AtomicRCG(CGF);
5210 } else {
5211 AtomicRCG(CGF);
5212 }
5213
5214 CGF.EmitBranch(DefaultBB);
5215 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5216}
5217
5218/// Generates unique name for artificial threadprivate variables.
5219/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5220static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5221 const Expr *Ref) {
5222 SmallString<256> Buffer;
5223 llvm::raw_svector_ostream Out(Buffer);
5224 const clang::DeclRefExpr *DE;
5225 const VarDecl *D = ::getBaseDecl(Ref, DE);
5226 if (!D)
5227 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5228 D = D->getCanonicalDecl();
5229 std::string Name = CGM.getOpenMPRuntime().getName(
5230 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5231 Out << Prefix << Name << "_"
5233 return std::string(Out.str());
5234}
5235
5236/// Emits reduction initializer function:
5237/// \code
5238/// void @.red_init(void* %arg, void* %orig) {
5239/// %0 = bitcast void* %arg to <type>*
5240/// store <type> <init>, <type>* %0
5241/// ret void
5242/// }
5243/// \endcode
5244static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5246 ReductionCodeGen &RCG, unsigned N) {
5247 ASTContext &C = CGM.getContext();
5248 QualType VoidPtrTy = C.VoidPtrTy;
5249 VoidPtrTy.addRestrict();
5250 FunctionArgList Args;
5251 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5253 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5255 Args.emplace_back(&Param);
5256 Args.emplace_back(&ParamOrig);
5257 const auto &FnInfo =
5258 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5259 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5260 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5261 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5262 Name, &CGM.getModule());
5263 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5264 Fn->setDoesNotRecurse();
5265 CodeGenFunction CGF(CGM);
5266 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5267 QualType PrivateType = RCG.getPrivateType(N);
5268 Address PrivateAddr = CGF.EmitLoadOfPointer(
5269 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5270 C.getPointerType(PrivateType)->castAs<PointerType>());
5271 llvm::Value *Size = nullptr;
5272 // If the size of the reduction item is non-constant, load it from global
5273 // threadprivate variable.
5274 if (RCG.getSizes(N).second) {
5276 CGF, CGM.getContext().getSizeType(),
5277 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5278 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5279 CGM.getContext().getSizeType(), Loc);
5280 }
5281 RCG.emitAggregateType(CGF, N, Size);
5282 Address OrigAddr = Address::invalid();
5283 // If initializer uses initializer from declare reduction construct, emit a
5284 // pointer to the address of the original reduction item (reuired by reduction
5285 // initializer)
5286 if (RCG.usesReductionInitializer(N)) {
5287 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5288 OrigAddr = CGF.EmitLoadOfPointer(
5289 SharedAddr,
5290 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5291 }
5292 // Emit the initializer:
5293 // %0 = bitcast void* %arg to <type>*
5294 // store <type> <init>, <type>* %0
5295 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5296 [](CodeGenFunction &) { return false; });
5297 CGF.FinishFunction();
5298 return Fn;
5299}
5300
5301/// Emits reduction combiner function:
5302/// \code
5303/// void @.red_comb(void* %arg0, void* %arg1) {
5304/// %lhs = bitcast void* %arg0 to <type>*
5305/// %rhs = bitcast void* %arg1 to <type>*
5306/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5307/// store <type> %2, <type>* %lhs
5308/// ret void
5309/// }
5310/// \endcode
5311static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5313 ReductionCodeGen &RCG, unsigned N,
5314 const Expr *ReductionOp,
5315 const Expr *LHS, const Expr *RHS,
5316 const Expr *PrivateRef) {
5317 ASTContext &C = CGM.getContext();
5318 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5319 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5320 FunctionArgList Args;
5321 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5322 C.VoidPtrTy, ImplicitParamKind::Other);
5323 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5325 Args.emplace_back(&ParamInOut);
5326 Args.emplace_back(&ParamIn);
5327 const auto &FnInfo =
5328 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5329 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5330 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5331 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5332 Name, &CGM.getModule());
5333 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5334 Fn->setDoesNotRecurse();
5335 CodeGenFunction CGF(CGM);
5336 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5337 llvm::Value *Size = nullptr;
5338 // If the size of the reduction item is non-constant, load it from global
5339 // threadprivate variable.
5340 if (RCG.getSizes(N).second) {
5342 CGF, CGM.getContext().getSizeType(),
5343 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5344 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5345 CGM.getContext().getSizeType(), Loc);
5346 }
5347 RCG.emitAggregateType(CGF, N, Size);
5348 // Remap lhs and rhs variables to the addresses of the function arguments.
5349 // %lhs = bitcast void* %arg0 to <type>*
5350 // %rhs = bitcast void* %arg1 to <type>*
5351 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5352 PrivateScope.addPrivate(
5353 LHSVD,
5354 // Pull out the pointer to the variable.
5356 CGF.GetAddrOfLocalVar(&ParamInOut)
5357 .withElementType(CGF.Builder.getPtrTy(0)),
5358 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5359 PrivateScope.addPrivate(
5360 RHSVD,
5361 // Pull out the pointer to the variable.
5363 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5364 CGF.Builder.getPtrTy(0)),
5365 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5366 PrivateScope.Privatize();
5367 // Emit the combiner body:
5368 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5369 // store <type> %2, <type>* %lhs
5371 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5372 cast<DeclRefExpr>(RHS));
5373 CGF.FinishFunction();
5374 return Fn;
5375}
5376
5377/// Emits reduction finalizer function:
5378/// \code
5379/// void @.red_fini(void* %arg) {
5380/// %0 = bitcast void* %arg to <type>*
5381/// <destroy>(<type>* %0)
5382/// ret void
5383/// }
5384/// \endcode
5385static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5387 ReductionCodeGen &RCG, unsigned N) {
5388 if (!RCG.needCleanups(N))
5389 return nullptr;
5390 ASTContext &C = CGM.getContext();
5391 FunctionArgList Args;
5392 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5394 Args.emplace_back(&Param);
5395 const auto &FnInfo =
5396 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5397 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5398 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5399 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5400 Name, &CGM.getModule());
5401 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5402 Fn->setDoesNotRecurse();
5403 CodeGenFunction CGF(CGM);
5404 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5405 Address PrivateAddr = CGF.EmitLoadOfPointer(
5406 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5407 llvm::Value *Size = nullptr;
5408 // If the size of the reduction item is non-constant, load it from global
5409 // threadprivate variable.
5410 if (RCG.getSizes(N).second) {
5412 CGF, CGM.getContext().getSizeType(),
5413 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5414 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5415 CGM.getContext().getSizeType(), Loc);
5416 }
5417 RCG.emitAggregateType(CGF, N, Size);
5418 // Emit the finalizer body:
5419 // <destroy>(<type>* %0)
5420 RCG.emitCleanups(CGF, N, PrivateAddr);
5421 CGF.FinishFunction(Loc);
5422 return Fn;
5423}
5424
5427 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5428 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5429 return nullptr;
5430
5431 // Build typedef struct:
5432 // kmp_taskred_input {
5433 // void *reduce_shar; // shared reduction item
5434 // void *reduce_orig; // original reduction item used for initialization
5435 // size_t reduce_size; // size of data item
5436 // void *reduce_init; // data initialization routine
5437 // void *reduce_fini; // data finalization routine
5438 // void *reduce_comb; // data combiner routine
5439 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5440 // } kmp_taskred_input_t;
5442 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5443 RD->startDefinition();
5444 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5445 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5446 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5447 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5448 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5449 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5450 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5451 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5452 RD->completeDefinition();
5453 QualType RDType = C.getRecordType(RD);
5454 unsigned Size = Data.ReductionVars.size();
5455 llvm::APInt ArraySize(/*numBits=*/64, Size);
5456 QualType ArrayRDType =
5457 C.getConstantArrayType(RDType, ArraySize, nullptr,
5458 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5459 // kmp_task_red_input_t .rd_input.[Size];
5460 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5461 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5462 Data.ReductionCopies, Data.ReductionOps);
5463 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5464 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5465 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5466 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5467 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5468 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5469 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5470 ".rd_input.gep.");
5471 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5472 // ElemLVal.reduce_shar = &Shareds[Cnt];
5473 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5474 RCG.emitSharedOrigLValue(CGF, Cnt);
5475 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5476 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5477 // ElemLVal.reduce_orig = &Origs[Cnt];
5478 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5479 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5480 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5481 RCG.emitAggregateType(CGF, Cnt);
5482 llvm::Value *SizeValInChars;
5483 llvm::Value *SizeVal;
5484 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5485 // We use delayed creation/initialization for VLAs and array sections. It is
5486 // required because runtime does not provide the way to pass the sizes of
5487 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5488 // threadprivate global variables are used to store these values and use
5489 // them in the functions.
5490 bool DelayedCreation = !!SizeVal;
5491 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5492 /*isSigned=*/false);
5493 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5494 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5495 // ElemLVal.reduce_init = init;
5496 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5497 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5498 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5499 // ElemLVal.reduce_fini = fini;
5500 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5501 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5502 llvm::Value *FiniAddr =
5503 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5504 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5505 // ElemLVal.reduce_comb = comb;
5506 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5507 llvm::Value *CombAddr = emitReduceCombFunction(
5508 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5509 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5510 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5511 // ElemLVal.flags = 0;
5512 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5513 if (DelayedCreation) {
5515 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5516 FlagsLVal);
5517 } else
5518 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5519 }
5520 if (Data.IsReductionWithTaskMod) {
5521 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5522 // is_ws, int num, void *data);
5523 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5524 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5525 CGM.IntTy, /*isSigned=*/true);
5526 llvm::Value *Args[] = {
5527 IdentTLoc, GTid,
5528 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5529 /*isSigned=*/true),
5530 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5532 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5533 return CGF.EmitRuntimeCall(
5534 OMPBuilder.getOrCreateRuntimeFunction(
5535 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5536 Args);
5537 }
5538 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5539 llvm::Value *Args[] = {
5540 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5541 /*isSigned=*/true),
5542 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5544 CGM.VoidPtrTy)};
5545 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5546 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5547 Args);
5548}
5549
5552 bool IsWorksharingReduction) {
5553 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5554 // is_ws, int num, void *data);
5555 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5556 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5557 CGM.IntTy, /*isSigned=*/true);
5558 llvm::Value *Args[] = {IdentTLoc, GTid,
5559 llvm::ConstantInt::get(CGM.IntTy,
5560 IsWorksharingReduction ? 1 : 0,
5561 /*isSigned=*/true)};
5562 (void)CGF.EmitRuntimeCall(
5563 OMPBuilder.getOrCreateRuntimeFunction(
5564 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5565 Args);
5566}
5567
5570 ReductionCodeGen &RCG,
5571 unsigned N) {
5572 auto Sizes = RCG.getSizes(N);
5573 // Emit threadprivate global variable if the type is non-constant
5574 // (Sizes.second = nullptr).
5575 if (Sizes.second) {
5576 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5577 /*isSigned=*/false);
5579 CGF, CGM.getContext().getSizeType(),
5580 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5581 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5582 }
5583}
5584
5587 llvm::Value *ReductionsPtr,
5588 LValue SharedLVal) {
5589 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5590 // *d);
5591 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5592 CGM.IntTy,
5593 /*isSigned=*/true),
5594 ReductionsPtr,
5596 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5597 return Address(
5598 CGF.EmitRuntimeCall(
5599 OMPBuilder.getOrCreateRuntimeFunction(
5600 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5601 Args),
5602 CGF.Int8Ty, SharedLVal.getAlignment());
5603}
5604
5606 const OMPTaskDataTy &Data) {
5607 if (!CGF.HaveInsertPoint())
5608 return;
5609
5610 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5611 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5612 OMPBuilder.createTaskwait(CGF.Builder);
5613 } else {
5614 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5615 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5616 auto &M = CGM.getModule();
5617 Address DependenciesArray = Address::invalid();
5618 llvm::Value *NumOfElements;
5619 std::tie(NumOfElements, DependenciesArray) =
5620 emitDependClause(CGF, Data.Dependences, Loc);
5621 if (!Data.Dependences.empty()) {
5622 llvm::Value *DepWaitTaskArgs[7];
5623 DepWaitTaskArgs[0] = UpLoc;
5624 DepWaitTaskArgs[1] = ThreadID;
5625 DepWaitTaskArgs[2] = NumOfElements;
5626 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5627 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5628 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5629 DepWaitTaskArgs[6] =
5630 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5631
5632 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5633
5634 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5635 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5636 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5637 // kmp_int32 has_no_wait); if dependence info is specified.
5638 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5639 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5640 DepWaitTaskArgs);
5641
5642 } else {
5643
5644 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5645 // global_tid);
5646 llvm::Value *Args[] = {UpLoc, ThreadID};
5647 // Ignore return result until untied tasks are supported.
5648 CGF.EmitRuntimeCall(
5649 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5650 Args);
5651 }
5652 }
5653
5654 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5655 Region->emitUntiedSwitch(CGF);
5656}
5657
5659 OpenMPDirectiveKind InnerKind,
5660 const RegionCodeGenTy &CodeGen,
5661 bool HasCancel) {
5662 if (!CGF.HaveInsertPoint())
5663 return;
5664 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5665 InnerKind != OMPD_critical &&
5666 InnerKind != OMPD_master &&
5667 InnerKind != OMPD_masked);
5668 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5669}
5670
5671namespace {
5672enum RTCancelKind {
5673 CancelNoreq = 0,
5674 CancelParallel = 1,
5675 CancelLoop = 2,
5676 CancelSections = 3,
5677 CancelTaskgroup = 4
5678};
5679} // anonymous namespace
5680
5681static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5682 RTCancelKind CancelKind = CancelNoreq;
5683 if (CancelRegion == OMPD_parallel)
5684 CancelKind = CancelParallel;
5685 else if (CancelRegion == OMPD_for)
5686 CancelKind = CancelLoop;
5687 else if (CancelRegion == OMPD_sections)
5688 CancelKind = CancelSections;
5689 else {
5690 assert(CancelRegion == OMPD_taskgroup);
5691 CancelKind = CancelTaskgroup;
5692 }
5693 return CancelKind;
5694}
5695
5698 OpenMPDirectiveKind CancelRegion) {
5699 if (!CGF.HaveInsertPoint())
5700 return;
5701 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5702 // global_tid, kmp_int32 cncl_kind);
5703 if (auto *OMPRegionInfo =
5704 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5705 // For 'cancellation point taskgroup', the task region info may not have a
5706 // cancel. This may instead happen in another adjacent task.
5707 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5708 llvm::Value *Args[] = {
5710 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5711 // Ignore return result until untied tasks are supported.
5712 llvm::Value *Result = CGF.EmitRuntimeCall(
5713 OMPBuilder.getOrCreateRuntimeFunction(
5714 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5715 Args);
5716 // if (__kmpc_cancellationpoint()) {
5717 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5718 // exit from construct;
5719 // }
5720 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5721 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5722 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5723 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5724 CGF.EmitBlock(ExitBB);
5725 if (CancelRegion == OMPD_parallel)
5726 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5727 // exit from construct;
5728 CodeGenFunction::JumpDest CancelDest =
5729 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5730 CGF.EmitBranchThroughCleanup(CancelDest);
5731 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5732 }
5733 }
5734}
5735
5737 const Expr *IfCond,
5738 OpenMPDirectiveKind CancelRegion) {
5739 if (!CGF.HaveInsertPoint())
5740 return;
5741 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5742 // kmp_int32 cncl_kind);
5743 auto &M = CGM.getModule();
5744 if (auto *OMPRegionInfo =
5745 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5746 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5747 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5749 llvm::Value *Args[] = {
5750 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5751 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5752 // Ignore return result until untied tasks are supported.
5753 llvm::Value *Result = CGF.EmitRuntimeCall(
5754 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5755 // if (__kmpc_cancel()) {
5756 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5757 // exit from construct;
5758 // }
5759 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5760 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5761 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5762 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5763 CGF.EmitBlock(ExitBB);
5764 if (CancelRegion == OMPD_parallel)
5765 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5766 // exit from construct;
5767 CodeGenFunction::JumpDest CancelDest =
5768 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5769 CGF.EmitBranchThroughCleanup(CancelDest);
5770 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5771 };
5772 if (IfCond) {
5773 emitIfClause(CGF, IfCond, ThenGen,
5774 [](CodeGenFunction &, PrePostActionTy &) {});
5775 } else {
5776 RegionCodeGenTy ThenRCG(ThenGen);
5777 ThenRCG(CGF);
5778 }
5779 }
5780}
5781
5782namespace {
5783/// Cleanup action for uses_allocators support.
5784class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5786
5787public:
5788 OMPUsesAllocatorsActionTy(
5789 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5790 : Allocators(Allocators) {}
5791 void Enter(CodeGenFunction &CGF) override {
5792 if (!CGF.HaveInsertPoint())
5793 return;
5794 for (const auto &AllocatorData : Allocators) {
5796 CGF, AllocatorData.first, AllocatorData.second);
5797 }
5798 }
5799 void Exit(CodeGenFunction &CGF) override {
5800 if (!CGF.HaveInsertPoint())
5801 return;
5802 for (const auto &AllocatorData : Allocators) {
5804 AllocatorData.first);
5805 }
5806 }
5807};
5808} // namespace
5809
5811 const OMPExecutableDirective &D, StringRef ParentName,
5812 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5813 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5814 assert(!ParentName.empty() && "Invalid target entry parent name!");
5817 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5818 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5819 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5820 if (!D.AllocatorTraits)
5821 continue;
5822 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5823 }
5824 }
5825 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5826 CodeGen.setAction(UsesAllocatorAction);
5827 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5828 IsOffloadEntry, CodeGen);
5829}
5830
5832 const Expr *Allocator,
5833 const Expr *AllocatorTraits) {
5834 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5835 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5836 // Use default memspace handle.
5837 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5838 llvm::Value *NumTraits = llvm::ConstantInt::get(
5839 CGF.IntTy, cast<ConstantArrayType>(
5840 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5841 ->getSize()
5842 .getLimitedValue());
5843 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5845 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5846 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5847 AllocatorTraitsLVal.getBaseInfo(),
5848 AllocatorTraitsLVal.getTBAAInfo());
5849 llvm::Value *Traits = Addr.emitRawPointer(CGF);
5850
5851 llvm::Value *AllocatorVal =
5852 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5853 CGM.getModule(), OMPRTL___kmpc_init_allocator),
5854 {ThreadId, MemSpaceHandle, NumTraits, Traits});
5855 // Store to allocator.
5856 CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5857 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5858 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5859 AllocatorVal =
5860 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5861 Allocator->getType(), Allocator->getExprLoc());
5862 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5863}
5864
5866 const Expr *Allocator) {
5867 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5868 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5869 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5870 llvm::Value *AllocatorVal =
5871 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5872 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5873 CGF.getContext().VoidPtrTy,
5874 Allocator->getExprLoc());
5875 (void)CGF.EmitRuntimeCall(
5876 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5877 OMPRTL___kmpc_destroy_allocator),
5878 {ThreadId, AllocatorVal});
5879}
5880
5883 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
5884 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
5885 "invalid default attrs structure");
5886 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
5887 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
5888
5889 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
5890 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5891 /*UpperBoundOnly=*/true);
5892
5893 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5894 for (auto *A : C->getAttrs()) {
5895 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5896 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5897 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5898 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5899 &AttrMinBlocksVal, &AttrMaxBlocksVal);
5900 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5902 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5903 &AttrMaxThreadsVal);
5904 else
5905 continue;
5906
5907 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
5908 if (AttrMaxThreadsVal > 0)
5909 MaxThreadsVal = MaxThreadsVal > 0
5910 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5911 : AttrMaxThreadsVal;
5912 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
5913 if (AttrMaxBlocksVal > 0)
5914 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5915 : AttrMaxBlocksVal;
5916 }
5917 }
5918}
5919
5921 const OMPExecutableDirective &D, StringRef ParentName,
5922 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5923 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5924
5925 llvm::TargetRegionEntryInfo EntryInfo =
5927
5928 CodeGenFunction CGF(CGM, true);
5929 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5930 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5931 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5932
5933 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5934 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5936 };
5937
5938 cantFail(OMPBuilder.emitTargetRegionFunction(
5939 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
5940 OutlinedFnID));
5941
5942 if (!OutlinedFn)
5943 return;
5944
5945 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5946
5947 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5948 for (auto *A : C->getAttrs()) {
5949 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5950 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5951 }
5952 }
5953}
5954
5955/// Checks if the expression is constant or does not have non-trivial function
5956/// calls.
5957static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5958 // We can skip constant expressions.
5959 // We can skip expressions with trivial calls or simple expressions.
5961 !E->hasNonTrivialCall(Ctx)) &&
5962 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5963}
5964
5966 const Stmt *Body) {
5967 const Stmt *Child = Body->IgnoreContainers();
5968 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5969 Child = nullptr;
5970 for (const Stmt *S : C->body()) {
5971 if (const auto *E = dyn_cast<Expr>(S)) {
5972 if (isTrivial(Ctx, E))
5973 continue;
5974 }
5975 // Some of the statements can be ignored.
5976 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5977 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5978 continue;
5979 // Analyze declarations.
5980 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5981 if (llvm::all_of(DS->decls(), [](const Decl *D) {
5982 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5983 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5984 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5985 isa<UsingDirectiveDecl>(D) ||
5986 isa<OMPDeclareReductionDecl>(D) ||
5987 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
5988 return true;
5989 const auto *VD = dyn_cast<VarDecl>(D);
5990 if (!VD)
5991 return false;
5992 return VD->hasGlobalStorage() || !VD->isUsed();
5993 }))
5994 continue;
5995 }
5996 // Found multiple children - cannot get the one child only.
5997 if (Child)
5998 return nullptr;
5999 Child = S;
6000 }
6001 if (Child)
6002 Child = Child->IgnoreContainers();
6003 }
6004 return Child;
6005}
6006
6008 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6009 int32_t &MaxTeamsVal) {
6010
6011 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6012 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6013 "Expected target-based executable directive.");
6014 switch (DirectiveKind) {
6015 case OMPD_target: {
6016 const auto *CS = D.getInnermostCapturedStmt();
6017 const auto *Body =
6018 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6019 const Stmt *ChildStmt =
6021 if (const auto *NestedDir =
6022 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6023 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6024 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6025 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6026 ->getNumTeams()
6027 .front();
6028 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6029 if (auto Constant =
6030 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6031 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6032 return NumTeams;
6033 }
6034 MinTeamsVal = MaxTeamsVal = 0;
6035 return nullptr;
6036 }
6037 MinTeamsVal = MaxTeamsVal = 1;
6038 return nullptr;
6039 }
6040 // A value of -1 is used to check if we need to emit no teams region
6041 MinTeamsVal = MaxTeamsVal = -1;
6042 return nullptr;
6043 }
6044 case OMPD_target_teams_loop:
6045 case OMPD_target_teams:
6046 case OMPD_target_teams_distribute:
6047 case OMPD_target_teams_distribute_simd:
6048 case OMPD_target_teams_distribute_parallel_for:
6049 case OMPD_target_teams_distribute_parallel_for_simd: {
6050 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6051 const Expr *NumTeams =
6052 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6053 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6054 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6055 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6056 return NumTeams;
6057 }
6058 MinTeamsVal = MaxTeamsVal = 0;
6059 return nullptr;
6060 }
6061 case OMPD_target_parallel:
6062 case OMPD_target_parallel_for:
6063 case OMPD_target_parallel_for_simd:
6064 case OMPD_target_parallel_loop:
6065 case OMPD_target_simd:
6066 MinTeamsVal = MaxTeamsVal = 1;
6067 return nullptr;
6068 case OMPD_parallel:
6069 case OMPD_for:
6070 case OMPD_parallel_for:
6071 case OMPD_parallel_loop:
6072 case OMPD_parallel_master:
6073 case OMPD_parallel_sections:
6074 case OMPD_for_simd:
6075 case OMPD_parallel_for_simd:
6076 case OMPD_cancel:
6077 case OMPD_cancellation_point:
6078 case OMPD_ordered:
6079 case OMPD_threadprivate:
6080 case OMPD_allocate:
6081 case OMPD_task:
6082 case OMPD_simd:
6083 case OMPD_tile:
6084 case OMPD_unroll:
6085 case OMPD_sections:
6086 case OMPD_section:
6087 case OMPD_single:
6088 case OMPD_master:
6089 case OMPD_critical:
6090 case OMPD_taskyield:
6091 case OMPD_barrier:
6092 case OMPD_taskwait:
6093 case OMPD_taskgroup:
6094 case OMPD_atomic:
6095 case OMPD_flush:
6096 case OMPD_depobj:
6097 case OMPD_scan:
6098 case OMPD_teams:
6099 case OMPD_target_data:
6100 case OMPD_target_exit_data:
6101 case OMPD_target_enter_data:
6102 case OMPD_distribute:
6103 case OMPD_distribute_simd:
6104 case OMPD_distribute_parallel_for:
6105 case OMPD_distribute_parallel_for_simd:
6106 case OMPD_teams_distribute:
6107 case OMPD_teams_distribute_simd:
6108 case OMPD_teams_distribute_parallel_for:
6109 case OMPD_teams_distribute_parallel_for_simd:
6110 case OMPD_target_update:
6111 case OMPD_declare_simd:
6112 case OMPD_declare_variant:
6113 case OMPD_begin_declare_variant:
6114 case OMPD_end_declare_variant:
6115 case OMPD_declare_target:
6116 case OMPD_end_declare_target:
6117 case OMPD_declare_reduction:
6118 case OMPD_declare_mapper:
6119 case OMPD_taskloop:
6120 case OMPD_taskloop_simd:
6121 case OMPD_master_taskloop:
6122 case OMPD_master_taskloop_simd:
6123 case OMPD_parallel_master_taskloop:
6124 case OMPD_parallel_master_taskloop_simd:
6125 case OMPD_requires:
6126 case OMPD_metadirective:
6127 case OMPD_unknown:
6128 break;
6129 default:
6130 break;
6131 }
6132 llvm_unreachable("Unexpected directive kind.");
6133}
6134
6137 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6138 "Clauses associated with the teams directive expected to be emitted "
6139 "only for the host!");
6140 CGBuilderTy &Bld = CGF.Builder;
6141 int32_t MinNT = -1, MaxNT = -1;
6142 const Expr *NumTeams =
6143 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6144 if (NumTeams != nullptr) {
6145 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6146
6147 switch (DirectiveKind) {
6148 case OMPD_target: {
6149 const auto *CS = D.getInnermostCapturedStmt();
6150 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6151 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6152 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6153 /*IgnoreResultAssign*/ true);
6154 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6155 /*isSigned=*/true);
6156 }
6157 case OMPD_target_teams:
6158 case OMPD_target_teams_distribute:
6159 case OMPD_target_teams_distribute_simd:
6160 case OMPD_target_teams_distribute_parallel_for:
6161 case OMPD_target_teams_distribute_parallel_for_simd: {
6162 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6163 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6164 /*IgnoreResultAssign*/ true);
6165 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6166 /*isSigned=*/true);
6167 }
6168 default:
6169 break;
6170 }
6171 }
6172
6173 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6174 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6175}
6176
6177/// Check for a num threads constant value (stored in \p DefaultVal), or
6178/// expression (stored in \p E). If the value is conditional (via an if-clause),
6179/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6180/// nullptr, no expression evaluation is perfomed.
6181static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6182 const Expr **E, int32_t &UpperBound,
6183 bool UpperBoundOnly, llvm::Value **CondVal) {
6185 CGF.getContext(), CS->getCapturedStmt());
6186 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6187 if (!Dir)
6188 return;
6189
6190 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6191 // Handle if clause. If if clause present, the number of threads is
6192 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6193 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6194 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6195 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6196 const OMPIfClause *IfClause = nullptr;
6197 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6198 if (C->getNameModifier() == OMPD_unknown ||
6199 C->getNameModifier() == OMPD_parallel) {
6200 IfClause = C;
6201 break;
6202 }
6203 }
6204 if (IfClause) {
6205 const Expr *CondExpr = IfClause->getCondition();
6206 bool Result;
6207 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6208 if (!Result) {
6209 UpperBound = 1;
6210 return;
6211 }
6212 } else {
6213 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6214 if (const auto *PreInit =
6215 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6216 for (const auto *I : PreInit->decls()) {
6217 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6218 CGF.EmitVarDecl(cast<VarDecl>(*I));
6219 } else {
6220 CodeGenFunction::AutoVarEmission Emission =
6221 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6222 CGF.EmitAutoVarCleanups(Emission);
6223 }
6224 }
6225 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6226 }
6227 }
6228 }
6229 }
6230 // Check the value of num_threads clause iff if clause was not specified
6231 // or is not evaluated to false.
6232 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6233 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6234 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6235 const auto *NumThreadsClause =
6236 Dir->getSingleClause<OMPNumThreadsClause>();
6237 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6238 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6239 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6240 UpperBound =
6241 UpperBound
6242 ? Constant->getZExtValue()
6243 : std::min(UpperBound,
6244 static_cast<int32_t>(Constant->getZExtValue()));
6245 // If we haven't found a upper bound, remember we saw a thread limiting
6246 // clause.
6247 if (UpperBound == -1)
6248 UpperBound = 0;
6249 if (!E)
6250 return;
6251 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6252 if (const auto *PreInit =
6253 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6254 for (const auto *I : PreInit->decls()) {
6255 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6256 CGF.EmitVarDecl(cast<VarDecl>(*I));
6257 } else {
6258 CodeGenFunction::AutoVarEmission Emission =
6259 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6260 CGF.EmitAutoVarCleanups(Emission);
6261 }
6262 }
6263 }
6264 *E = NTExpr;
6265 }
6266 return;
6267 }
6268 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6269 UpperBound = 1;
6270}
6271
6273 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6274 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6275 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6276 "Clauses associated with the teams directive expected to be emitted "
6277 "only for the host!");
6278 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6279 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6280 "Expected target-based executable directive.");
6281
6282 const Expr *NT = nullptr;
6283 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6284
6285 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6286 if (E->isIntegerConstantExpr(CGF.getContext())) {
6287 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6288 UpperBound = UpperBound ? Constant->getZExtValue()
6289 : std::min(UpperBound,
6290 int32_t(Constant->getZExtValue()));
6291 }
6292 // If we haven't found a upper bound, remember we saw a thread limiting
6293 // clause.
6294 if (UpperBound == -1)
6295 UpperBound = 0;
6296 if (EPtr)
6297 *EPtr = E;
6298 };
6299
6300 auto ReturnSequential = [&]() {
6301 UpperBound = 1;
6302 return NT;
6303 };
6304
6305 switch (DirectiveKind) {
6306 case OMPD_target: {
6307 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6308 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6310 CGF.getContext(), CS->getCapturedStmt());
6311 // TODO: The standard is not clear how to resolve two thread limit clauses,
6312 // let's pick the teams one if it's present, otherwise the target one.
6313 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6314 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6315 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6316 ThreadLimitClause = TLC;
6317 if (ThreadLimitExpr) {
6318 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6319 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6321 CGF,
6322 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6323 if (const auto *PreInit =
6324 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6325 for (const auto *I : PreInit->decls()) {
6326 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6327 CGF.EmitVarDecl(cast<VarDecl>(*I));
6328 } else {
6330 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6331 CGF.EmitAutoVarCleanups(Emission);
6332 }
6333 }
6334 }
6335 }
6336 }
6337 }
6338 if (ThreadLimitClause)
6339 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6340 ThreadLimitExpr);
6341 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6342 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6343 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6344 CS = Dir->getInnermostCapturedStmt();
6346 CGF.getContext(), CS->getCapturedStmt());
6347 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6348 }
6349 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6350 CS = Dir->getInnermostCapturedStmt();
6351 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6352 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6353 return ReturnSequential();
6354 }
6355 return NT;
6356 }
6357 case OMPD_target_teams: {
6358 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6359 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6360 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6361 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6362 ThreadLimitExpr);
6363 }
6364 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6365 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6367 CGF.getContext(), CS->getCapturedStmt());
6368 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6369 if (Dir->getDirectiveKind() == OMPD_distribute) {
6370 CS = Dir->getInnermostCapturedStmt();
6371 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6372 }
6373 }
6374 return NT;
6375 }
6376 case OMPD_target_teams_distribute:
6377 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6378 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6379 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6380 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6381 ThreadLimitExpr);
6382 }
6383 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6384 UpperBoundOnly, CondVal);
6385 return NT;
6386 case OMPD_target_teams_loop:
6387 case OMPD_target_parallel_loop:
6388 case OMPD_target_parallel:
6389 case OMPD_target_parallel_for:
6390 case OMPD_target_parallel_for_simd:
6391 case OMPD_target_teams_distribute_parallel_for:
6392 case OMPD_target_teams_distribute_parallel_for_simd: {
6393 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6394 const OMPIfClause *IfClause = nullptr;
6395 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6396 if (C->getNameModifier() == OMPD_unknown ||
6397 C->getNameModifier() == OMPD_parallel) {
6398 IfClause = C;
6399 break;
6400 }
6401 }
6402 if (IfClause) {
6403 const Expr *Cond = IfClause->getCondition();
6404 bool Result;
6405 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6406 if (!Result)
6407 return ReturnSequential();
6408 } else {
6410 *CondVal = CGF.EvaluateExprAsBool(Cond);
6411 }
6412 }
6413 }
6414 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6415 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6416 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6417 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6418 ThreadLimitExpr);
6419 }
6420 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6421 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6422 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6423 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6424 return NumThreadsClause->getNumThreads();
6425 }
6426 return NT;
6427 }
6428 case OMPD_target_teams_distribute_simd:
6429 case OMPD_target_simd:
6430 return ReturnSequential();
6431 default:
6432 break;
6433 }
6434 llvm_unreachable("Unsupported directive kind.");
6435}
6436
6439 llvm::Value *NumThreadsVal = nullptr;
6440 llvm::Value *CondVal = nullptr;
6441 llvm::Value *ThreadLimitVal = nullptr;
6442 const Expr *ThreadLimitExpr = nullptr;
6443 int32_t UpperBound = -1;
6444
6446 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6447 &ThreadLimitExpr);
6448
6449 // Thread limit expressions are used below, emit them.
6450 if (ThreadLimitExpr) {
6451 ThreadLimitVal =
6452 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6453 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6454 /*isSigned=*/false);
6455 }
6456
6457 // Generate the num teams expression.
6458 if (UpperBound == 1) {
6459 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6460 } else if (NT) {
6461 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6462 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6463 /*isSigned=*/false);
6464 } else if (ThreadLimitVal) {
6465 // If we do not have a num threads value but a thread limit, replace the
6466 // former with the latter. We know handled the thread limit expression.
6467 NumThreadsVal = ThreadLimitVal;
6468 ThreadLimitVal = nullptr;
6469 } else {
6470 // Default to "0" which means runtime choice.
6471 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6472 NumThreadsVal = CGF.Builder.getInt32(0);
6473 }
6474
6475 // Handle if clause. If if clause present, the number of threads is
6476 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6477 if (CondVal) {
6479 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6480 CGF.Builder.getInt32(1));
6481 }
6482
6483 // If the thread limit and num teams expression were present, take the
6484 // minimum.
6485 if (ThreadLimitVal) {
6486 NumThreadsVal = CGF.Builder.CreateSelect(
6487 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6488 ThreadLimitVal, NumThreadsVal);
6489 }
6490
6491 return NumThreadsVal;
6492}
6493
6494namespace {
6496
6497// Utility to handle information from clauses associated with a given
6498// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6499// It provides a convenient interface to obtain the information and generate
6500// code for that information.
6501class MappableExprsHandler {
6502public:
6503 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6504 static unsigned getFlagMemberOffset() {
6505 unsigned Offset = 0;
6506 for (uint64_t Remain =
6507 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6508 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6509 !(Remain & 1); Remain = Remain >> 1)
6510 Offset++;
6511 return Offset;
6512 }
6513
6514 /// Class that holds debugging information for a data mapping to be passed to
6515 /// the runtime library.
6516 class MappingExprInfo {
6517 /// The variable declaration used for the data mapping.
6518 const ValueDecl *MapDecl = nullptr;
6519 /// The original expression used in the map clause, or null if there is
6520 /// none.
6521 const Expr *MapExpr = nullptr;
6522
6523 public:
6524 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6525 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6526
6527 const ValueDecl *getMapDecl() const { return MapDecl; }
6528 const Expr *getMapExpr() const { return MapExpr; }
6529 };
6530
6531 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6532 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6533 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6534 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6535 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6536 using MapNonContiguousArrayTy =
6537 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6538 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6539 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6540
6541 /// This structure contains combined information generated for mappable
6542 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6543 /// mappers, and non-contiguous information.
6544 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6545 MapExprsArrayTy Exprs;
6546 MapValueDeclsArrayTy Mappers;
6547 MapValueDeclsArrayTy DevicePtrDecls;
6548
6549 /// Append arrays in \a CurInfo.
6550 void append(MapCombinedInfoTy &CurInfo) {
6551 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6552 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6553 CurInfo.DevicePtrDecls.end());
6554 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6555 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6556 }
6557 };
6558
6559 /// Map between a struct and the its lowest & highest elements which have been
6560 /// mapped.
6561 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6562 /// HE(FieldIndex, Pointer)}
6563 struct StructRangeInfoTy {
6564 MapCombinedInfoTy PreliminaryMapData;
6565 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6566 0, Address::invalid()};
6567 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6568 0, Address::invalid()};
6571 bool IsArraySection = false;
6572 bool HasCompleteRecord = false;
6573 };
6574
6575private:
6576 /// Kind that defines how a device pointer has to be returned.
6577 struct MapInfo {
6581 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6582 bool ReturnDevicePointer = false;
6583 bool IsImplicit = false;
6584 const ValueDecl *Mapper = nullptr;
6585 const Expr *VarRef = nullptr;
6586 bool ForDeviceAddr = false;
6587
6588 MapInfo() = default;
6589 MapInfo(
6591 OpenMPMapClauseKind MapType,
6593 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6594 bool ReturnDevicePointer, bool IsImplicit,
6595 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6596 bool ForDeviceAddr = false)
6597 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6598 MotionModifiers(MotionModifiers),
6599 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6600 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6601 };
6602
6603 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6604 /// member and there is no map information about it, then emission of that
6605 /// entry is deferred until the whole struct has been processed.
6606 struct DeferredDevicePtrEntryTy {
6607 const Expr *IE = nullptr;
6608 const ValueDecl *VD = nullptr;
6609 bool ForDeviceAddr = false;
6610
6611 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6612 bool ForDeviceAddr)
6613 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6614 };
6615
6616 /// The target directive from where the mappable clauses were extracted. It
6617 /// is either a executable directive or a user-defined mapper directive.
6618 llvm::PointerUnion<const OMPExecutableDirective *,
6619 const OMPDeclareMapperDecl *>
6620 CurDir;
6621
6622 /// Function the directive is being generated for.
6623 CodeGenFunction &CGF;
6624
6625 /// Set of all first private variables in the current directive.
6626 /// bool data is set to true if the variable is implicitly marked as
6627 /// firstprivate, false otherwise.
6628 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6629
6630 /// Map between device pointer declarations and their expression components.
6631 /// The key value for declarations in 'this' is null.
6632 llvm::DenseMap<
6633 const ValueDecl *,
6635 DevPointersMap;
6636
6637 /// Map between device addr declarations and their expression components.
6638 /// The key value for declarations in 'this' is null.
6639 llvm::DenseMap<
6640 const ValueDecl *,
6642 HasDevAddrsMap;
6643
6644 /// Map between lambda declarations and their map type.
6645 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6646
6647 llvm::Value *getExprTypeSize(const Expr *E) const {
6648 QualType ExprTy = E->getType().getCanonicalType();
6649
6650 // Calculate the size for array shaping expression.
6651 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6652 llvm::Value *Size =
6653 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6654 for (const Expr *SE : OAE->getDimensions()) {
6655 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6656 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6657 CGF.getContext().getSizeType(),
6658 SE->getExprLoc());
6659 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6660 }
6661 return Size;
6662 }
6663
6664 // Reference types are ignored for mapping purposes.
6665 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6666 ExprTy = RefTy->getPointeeType().getCanonicalType();
6667
6668 // Given that an array section is considered a built-in type, we need to
6669 // do the calculation based on the length of the section instead of relying
6670 // on CGF.getTypeSize(E->getType()).
6671 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6673 OAE->getBase()->IgnoreParenImpCasts())
6675
6676 // If there is no length associated with the expression and lower bound is
6677 // not specified too, that means we are using the whole length of the
6678 // base.
6679 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6680 !OAE->getLowerBound())
6681 return CGF.getTypeSize(BaseTy);
6682
6683 llvm::Value *ElemSize;
6684 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6685 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6686 } else {
6687 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6688 assert(ATy && "Expecting array type if not a pointer type.");
6689 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6690 }
6691
6692 // If we don't have a length at this point, that is because we have an
6693 // array section with a single element.
6694 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6695 return ElemSize;
6696
6697 if (const Expr *LenExpr = OAE->getLength()) {
6698 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6699 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6700 CGF.getContext().getSizeType(),
6701 LenExpr->getExprLoc());
6702 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6703 }
6704 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6705 OAE->getLowerBound() && "expected array_section[lb:].");
6706 // Size = sizetype - lb * elemtype;
6707 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6708 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6709 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6710 CGF.getContext().getSizeType(),
6711 OAE->getLowerBound()->getExprLoc());
6712 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6713 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6714 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6715 LengthVal = CGF.Builder.CreateSelect(
6716 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6717 return LengthVal;
6718 }
6719 return CGF.getTypeSize(ExprTy);
6720 }
6721
6722 /// Return the corresponding bits for a given map clause modifier. Add
6723 /// a flag marking the map as a pointer if requested. Add a flag marking the
6724 /// map as the first one of a series of maps that relate to the same map
6725 /// expression.
6726 OpenMPOffloadMappingFlags getMapTypeBits(
6728 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6729 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6730 OpenMPOffloadMappingFlags Bits =
6731 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6732 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6733 switch (MapType) {
6734 case OMPC_MAP_alloc:
6735 case OMPC_MAP_release:
6736 // alloc and release is the default behavior in the runtime library, i.e.
6737 // if we don't pass any bits alloc/release that is what the runtime is
6738 // going to do. Therefore, we don't need to signal anything for these two
6739 // type modifiers.
6740 break;
6741 case OMPC_MAP_to:
6742 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6743 break;
6744 case OMPC_MAP_from:
6745 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6746 break;
6747 case OMPC_MAP_tofrom:
6748 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6749 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6750 break;
6751 case OMPC_MAP_delete:
6752 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6753 break;
6754 case OMPC_MAP_unknown:
6755 llvm_unreachable("Unexpected map type!");
6756 }
6757 if (AddPtrFlag)
6758 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6759 if (AddIsTargetParamFlag)
6760 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6761 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6762 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6763 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6764 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6765 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6766 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6767 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6768 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6769 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6770 if (IsNonContiguous)
6771 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6772 return Bits;
6773 }
6774
6775 /// Return true if the provided expression is a final array section. A
6776 /// final array section, is one whose length can't be proved to be one.
6777 bool isFinalArraySectionExpression(const Expr *E) const {
6778 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
6779
6780 // It is not an array section and therefore not a unity-size one.
6781 if (!OASE)
6782 return false;
6783
6784 // An array section with no colon always refer to a single element.
6785 if (OASE->getColonLocFirst().isInvalid())
6786 return false;
6787
6788 const Expr *Length = OASE->getLength();
6789
6790 // If we don't have a length we have to check if the array has size 1
6791 // for this dimension. Also, we should always expect a length if the
6792 // base type is pointer.
6793 if (!Length) {
6795 OASE->getBase()->IgnoreParenImpCasts())
6797 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6798 return ATy->getSExtSize() != 1;
6799 // If we don't have a constant dimension length, we have to consider
6800 // the current section as having any size, so it is not necessarily
6801 // unitary. If it happen to be unity size, that's user fault.
6802 return true;
6803 }
6804
6805 // Check if the length evaluates to 1.
6806 Expr::EvalResult Result;
6807 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6808 return true; // Can have more that size 1.
6809
6810 llvm::APSInt ConstLength = Result.Val.getInt();
6811 return ConstLength.getSExtValue() != 1;
6812 }
6813
6814 /// Generate the base pointers, section pointers, sizes, map type bits, and
6815 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6816 /// map type, map or motion modifiers, and expression components.
6817 /// \a IsFirstComponent should be set to true if the provided set of
6818 /// components is the first associated with a capture.
6819 void generateInfoForComponentList(
6821 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6823 MapCombinedInfoTy &CombinedInfo,
6824 MapCombinedInfoTy &StructBaseCombinedInfo,
6825 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6826 bool IsImplicit, bool GenerateAllInfoForClauses,
6827 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6828 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6830 OverlappedElements = {},
6831 bool AreBothBasePtrAndPteeMapped = false) const {
6832 // The following summarizes what has to be generated for each map and the
6833 // types below. The generated information is expressed in this order:
6834 // base pointer, section pointer, size, flags
6835 // (to add to the ones that come from the map type and modifier).
6836 //
6837 // double d;
6838 // int i[100];
6839 // float *p;
6840 // int **a = &i;
6841 //
6842 // struct S1 {
6843 // int i;
6844 // float f[50];
6845 // }
6846 // struct S2 {
6847 // int i;
6848 // float f[50];
6849 // S1 s;
6850 // double *p;
6851 // struct S2 *ps;
6852 // int &ref;
6853 // }
6854 // S2 s;
6855 // S2 *ps;
6856 //
6857 // map(d)
6858 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6859 //
6860 // map(i)
6861 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6862 //
6863 // map(i[1:23])
6864 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6865 //
6866 // map(p)
6867 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6868 //
6869 // map(p[1:24])
6870 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6871 // in unified shared memory mode or for local pointers
6872 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6873 //
6874 // map((*a)[0:3])
6875 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6876 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6877 //
6878 // map(**a)
6879 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6880 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6881 //
6882 // map(s)
6883 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6884 //
6885 // map(s.i)
6886 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6887 //
6888 // map(s.s.f)
6889 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6890 //
6891 // map(s.p)
6892 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6893 //
6894 // map(to: s.p[:22])
6895 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6896 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6897 // &(s.p), &(s.p[0]), 22*sizeof(double),
6898 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6899 // (*) alloc space for struct members, only this is a target parameter
6900 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6901 // optimizes this entry out, same in the examples below)
6902 // (***) map the pointee (map: to)
6903 //
6904 // map(to: s.ref)
6905 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6906 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6907 // (*) alloc space for struct members, only this is a target parameter
6908 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6909 // optimizes this entry out, same in the examples below)
6910 // (***) map the pointee (map: to)
6911 //
6912 // map(s.ps)
6913 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6914 //
6915 // map(from: s.ps->s.i)
6916 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6917 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6918 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6919 //
6920 // map(to: s.ps->ps)
6921 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6922 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6923 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6924 //
6925 // map(s.ps->ps->ps)
6926 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6927 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6928 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6929 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6930 //
6931 // map(to: s.ps->ps->s.f[:22])
6932 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6933 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6934 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6935 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6936 //
6937 // map(ps)
6938 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6939 //
6940 // map(ps->i)
6941 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6942 //
6943 // map(ps->s.f)
6944 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6945 //
6946 // map(from: ps->p)
6947 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6948 //
6949 // map(to: ps->p[:22])
6950 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6951 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6952 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6953 //
6954 // map(ps->ps)
6955 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6956 //
6957 // map(from: ps->ps->s.i)
6958 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6959 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6960 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6961 //
6962 // map(from: ps->ps->ps)
6963 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6964 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6965 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6966 //
6967 // map(ps->ps->ps->ps)
6968 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6969 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6970 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6971 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6972 //
6973 // map(to: ps->ps->ps->s.f[:22])
6974 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6975 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6976 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6977 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6978 //
6979 // map(to: s.f[:22]) map(from: s.p[:33])
6980 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6981 // sizeof(double*) (**), TARGET_PARAM
6982 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6983 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6984 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6985 // (*) allocate contiguous space needed to fit all mapped members even if
6986 // we allocate space for members not mapped (in this example,
6987 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
6988 // them as well because they fall between &s.f[0] and &s.p)
6989 //
6990 // map(from: s.f[:22]) map(to: ps->p[:33])
6991 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6992 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6993 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6994 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6995 // (*) the struct this entry pertains to is the 2nd element in the list of
6996 // arguments, hence MEMBER_OF(2)
6997 //
6998 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6999 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7000 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7001 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7002 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7003 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7004 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7005 // (*) the struct this entry pertains to is the 4th element in the list
7006 // of arguments, hence MEMBER_OF(4)
7007 //
7008 // map(p, p[:100])
7009 // ===> map(p[:100])
7010 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7011
7012 // Track if the map information being generated is the first for a capture.
7013 bool IsCaptureFirstInfo = IsFirstComponentList;
7014 // When the variable is on a declare target link or in a to clause with
7015 // unified memory, a reference is needed to hold the host/device address
7016 // of the variable.
7017 bool RequiresReference = false;
7018
7019 // Scan the components from the base to the complete expression.
7020 auto CI = Components.rbegin();
7021 auto CE = Components.rend();
7022 auto I = CI;
7023
7024 // Track if the map information being generated is the first for a list of
7025 // components.
7026 bool IsExpressionFirstInfo = true;
7027 bool FirstPointerInComplexData = false;
7029 const Expr *AssocExpr = I->getAssociatedExpression();
7030 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7031 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7032 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7033
7034 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7035 return;
7036 if (isa<MemberExpr>(AssocExpr)) {
7037 // The base is the 'this' pointer. The content of the pointer is going
7038 // to be the base of the field being mapped.
7039 BP = CGF.LoadCXXThisAddress();
7040 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7041 (OASE &&
7042 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7043 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7044 } else if (OAShE &&
7045 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7046 BP = Address(
7047 CGF.EmitScalarExpr(OAShE->getBase()),
7048 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7049 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7050 } else {
7051 // The base is the reference to the variable.
7052 // BP = &Var.
7053 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7054 if (const auto *VD =
7055 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7056 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7057 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7058 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7059 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7060 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7062 RequiresReference = true;
7064 }
7065 }
7066 }
7067
7068 // If the variable is a pointer and is being dereferenced (i.e. is not
7069 // the last component), the base has to be the pointer itself, not its
7070 // reference. References are ignored for mapping purposes.
7071 QualType Ty =
7072 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7073 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7074 // No need to generate individual map information for the pointer, it
7075 // can be associated with the combined storage if shared memory mode is
7076 // active or the base declaration is not global variable.
7077 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7078 if (!AreBothBasePtrAndPteeMapped &&
7080 !VD || VD->hasLocalStorage()))
7081 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7082 else
7083 FirstPointerInComplexData = true;
7084 ++I;
7085 }
7086 }
7087
7088 // Track whether a component of the list should be marked as MEMBER_OF some
7089 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7090 // in a component list should be marked as MEMBER_OF, all subsequent entries
7091 // do not belong to the base struct. E.g.
7092 // struct S2 s;
7093 // s.ps->ps->ps->f[:]
7094 // (1) (2) (3) (4)
7095 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7096 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7097 // is the pointee of ps(2) which is not member of struct s, so it should not
7098 // be marked as such (it is still PTR_AND_OBJ).
7099 // The variable is initialized to false so that PTR_AND_OBJ entries which
7100 // are not struct members are not considered (e.g. array of pointers to
7101 // data).
7102 bool ShouldBeMemberOf = false;
7103
7104 // Variable keeping track of whether or not we have encountered a component
7105 // in the component list which is a member expression. Useful when we have a
7106 // pointer or a final array section, in which case it is the previous
7107 // component in the list which tells us whether we have a member expression.
7108 // E.g. X.f[:]
7109 // While processing the final array section "[:]" it is "f" which tells us
7110 // whether we are dealing with a member of a declared struct.
7111 const MemberExpr *EncounteredME = nullptr;
7112
7113 // Track for the total number of dimension. Start from one for the dummy
7114 // dimension.
7115 uint64_t DimSize = 1;
7116
7117 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7118 bool IsPrevMemberReference = false;
7119
7120 bool IsPartialMapped =
7121 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7122
7123 // We need to check if we will be encountering any MEs. If we do not
7124 // encounter any ME expression it means we will be mapping the whole struct.
7125 // In that case we need to skip adding an entry for the struct to the
7126 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7127 // list only when generating all info for clauses.
7128 bool IsMappingWholeStruct = true;
7129 if (!GenerateAllInfoForClauses) {
7130 IsMappingWholeStruct = false;
7131 } else {
7132 for (auto TempI = I; TempI != CE; ++TempI) {
7133 const MemberExpr *PossibleME =
7134 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7135 if (PossibleME) {
7136 IsMappingWholeStruct = false;
7137 break;
7138 }
7139 }
7140 }
7141
7142 for (; I != CE; ++I) {
7143 // If the current component is member of a struct (parent struct) mark it.
7144 if (!EncounteredME) {
7145 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7146 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7147 // as MEMBER_OF the parent struct.
7148 if (EncounteredME) {
7149 ShouldBeMemberOf = true;
7150 // Do not emit as complex pointer if this is actually not array-like
7151 // expression.
7152 if (FirstPointerInComplexData) {
7153 QualType Ty = std::prev(I)
7154 ->getAssociatedDeclaration()
7155 ->getType()
7156 .getNonReferenceType();
7157 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7158 FirstPointerInComplexData = false;
7159 }
7160 }
7161 }
7162
7163 auto Next = std::next(I);
7164
7165 // We need to generate the addresses and sizes if this is the last
7166 // component, if the component is a pointer or if it is an array section
7167 // whose length can't be proved to be one. If this is a pointer, it
7168 // becomes the base address for the following components.
7169
7170 // A final array section, is one whose length can't be proved to be one.
7171 // If the map item is non-contiguous then we don't treat any array section
7172 // as final array section.
7173 bool IsFinalArraySection =
7174 !IsNonContiguous &&
7175 isFinalArraySectionExpression(I->getAssociatedExpression());
7176
7177 // If we have a declaration for the mapping use that, otherwise use
7178 // the base declaration of the map clause.
7179 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7180 ? I->getAssociatedDeclaration()
7181 : BaseDecl;
7182 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7183 : MapExpr;
7184
7185 // Get information on whether the element is a pointer. Have to do a
7186 // special treatment for array sections given that they are built-in
7187 // types.
7188 const auto *OASE =
7189 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7190 const auto *OAShE =
7191 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7192 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7193 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7194 bool IsPointer =
7195 OAShE ||
7198 ->isAnyPointerType()) ||
7199 I->getAssociatedExpression()->getType()->isAnyPointerType();
7200 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7201 MapDecl &&
7202 MapDecl->getType()->isLValueReferenceType();
7203 bool IsNonDerefPointer = IsPointer &&
7204 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7205 !IsNonContiguous;
7206
7207 if (OASE)
7208 ++DimSize;
7209
7210 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7211 IsFinalArraySection) {
7212 // If this is not the last component, we expect the pointer to be
7213 // associated with an array expression or member expression.
7214 assert((Next == CE ||
7215 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7216 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7217 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7218 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7219 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7220 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7221 "Unexpected expression");
7222
7224 Address LowestElem = Address::invalid();
7225 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7226 const MemberExpr *E) {
7227 const Expr *BaseExpr = E->getBase();
7228 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7229 // scalar.
7230 LValue BaseLV;
7231 if (E->isArrow()) {
7232 LValueBaseInfo BaseInfo;
7233 TBAAAccessInfo TBAAInfo;
7234 Address Addr =
7235 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7236 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7237 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7238 } else {
7239 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7240 }
7241 return BaseLV;
7242 };
7243 if (OAShE) {
7244 LowestElem = LB =
7245 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7247 OAShE->getBase()->getType()->getPointeeType()),
7249 OAShE->getBase()->getType()));
7250 } else if (IsMemberReference) {
7251 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7252 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7253 LowestElem = CGF.EmitLValueForFieldInitialization(
7254 BaseLVal, cast<FieldDecl>(MapDecl))
7255 .getAddress();
7256 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7257 .getAddress();
7258 } else {
7259 LowestElem = LB =
7260 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7261 .getAddress();
7262 }
7263
7264 // If this component is a pointer inside the base struct then we don't
7265 // need to create any entry for it - it will be combined with the object
7266 // it is pointing to into a single PTR_AND_OBJ entry.
7267 bool IsMemberPointerOrAddr =
7268 EncounteredME &&
7269 (((IsPointer || ForDeviceAddr) &&
7270 I->getAssociatedExpression() == EncounteredME) ||
7271 (IsPrevMemberReference && !IsPointer) ||
7272 (IsMemberReference && Next != CE &&
7273 !Next->getAssociatedExpression()->getType()->isPointerType()));
7274 if (!OverlappedElements.empty() && Next == CE) {
7275 // Handle base element with the info for overlapped elements.
7276 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7277 assert(!IsPointer &&
7278 "Unexpected base element with the pointer type.");
7279 // Mark the whole struct as the struct that requires allocation on the
7280 // device.
7281 PartialStruct.LowestElem = {0, LowestElem};
7282 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7283 I->getAssociatedExpression()->getType());
7286 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7287 TypeSize.getQuantity() - 1);
7288 PartialStruct.HighestElem = {
7289 std::numeric_limits<decltype(
7290 PartialStruct.HighestElem.first)>::max(),
7291 HB};
7292 PartialStruct.Base = BP;
7293 PartialStruct.LB = LB;
7294 assert(
7295 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7296 "Overlapped elements must be used only once for the variable.");
7297 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7298 // Emit data for non-overlapped data.
7299 OpenMPOffloadMappingFlags Flags =
7300 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7301 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7302 /*AddPtrFlag=*/false,
7303 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7304 llvm::Value *Size = nullptr;
7305 // Do bitcopy of all non-overlapped structure elements.
7307 Component : OverlappedElements) {
7308 Address ComponentLB = Address::invalid();
7310 Component) {
7311 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7312 const auto *FD = dyn_cast<FieldDecl>(VD);
7313 if (FD && FD->getType()->isLValueReferenceType()) {
7314 const auto *ME =
7315 cast<MemberExpr>(MC.getAssociatedExpression());
7316 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7317 ComponentLB =
7318 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7319 .getAddress();
7320 } else {
7321 ComponentLB =
7322 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7323 .getAddress();
7324 }
7325 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7326 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7327 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7328 LBPtr);
7329 break;
7330 }
7331 }
7332 assert(Size && "Failed to determine structure size");
7333 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7334 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7335 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7336 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7337 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7338 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7339 Size, CGF.Int64Ty, /*isSigned=*/true));
7340 CombinedInfo.Types.push_back(Flags);
7341 CombinedInfo.Mappers.push_back(nullptr);
7342 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7343 : 1);
7344 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7345 }
7346 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7347 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7348 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7349 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7350 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7351 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7352 Size = CGF.Builder.CreatePtrDiff(
7353 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7354 LBPtr);
7355 CombinedInfo.Sizes.push_back(
7356 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7357 CombinedInfo.Types.push_back(Flags);
7358 CombinedInfo.Mappers.push_back(nullptr);
7359 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7360 : 1);
7361 break;
7362 }
7363 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7364 // Skip adding an entry in the CurInfo of this combined entry if the
7365 // whole struct is currently being mapped. The struct needs to be added
7366 // in the first position before any data internal to the struct is being
7367 // mapped.
7368 // Skip adding an entry in the CurInfo of this combined entry if the
7369 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
7370 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
7371 (Next == CE && MapType != OMPC_MAP_unknown)) {
7372 if (!IsMappingWholeStruct) {
7373 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7374 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7375 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7376 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7377 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7378 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7379 Size, CGF.Int64Ty, /*isSigned=*/true));
7380 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7381 : 1);
7382 } else {
7383 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7384 StructBaseCombinedInfo.BasePointers.push_back(
7385 BP.emitRawPointer(CGF));
7386 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7387 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7388 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7389 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7390 Size, CGF.Int64Ty, /*isSigned=*/true));
7391 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7392 IsNonContiguous ? DimSize : 1);
7393 }
7394
7395 // If Mapper is valid, the last component inherits the mapper.
7396 bool HasMapper = Mapper && Next == CE;
7397 if (!IsMappingWholeStruct)
7398 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7399 else
7400 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7401 : nullptr);
7402
7403 // We need to add a pointer flag for each map that comes from the
7404 // same expression except for the first one. We also need to signal
7405 // this map is the first one that relates with the current capture
7406 // (there is a set of entries for each capture).
7407 OpenMPOffloadMappingFlags Flags =
7408 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7409 !IsExpressionFirstInfo || RequiresReference ||
7410 FirstPointerInComplexData || IsMemberReference,
7411 AreBothBasePtrAndPteeMapped ||
7412 (IsCaptureFirstInfo && !RequiresReference),
7413 IsNonContiguous);
7414
7415 if (!IsExpressionFirstInfo || IsMemberReference) {
7416 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7417 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7418 if (IsPointer || (IsMemberReference && Next != CE))
7419 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7420 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7421 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7422 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7423 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7424
7425 if (ShouldBeMemberOf) {
7426 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7427 // should be later updated with the correct value of MEMBER_OF.
7428 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7429 // From now on, all subsequent PTR_AND_OBJ entries should not be
7430 // marked as MEMBER_OF.
7431 ShouldBeMemberOf = false;
7432 }
7433 }
7434
7435 if (!IsMappingWholeStruct)
7436 CombinedInfo.Types.push_back(Flags);
7437 else
7438 StructBaseCombinedInfo.Types.push_back(Flags);
7439 }
7440
7441 // If we have encountered a member expression so far, keep track of the
7442 // mapped member. If the parent is "*this", then the value declaration
7443 // is nullptr.
7444 if (EncounteredME) {
7445 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7446 unsigned FieldIndex = FD->getFieldIndex();
7447
7448 // Update info about the lowest and highest elements for this struct
7449 if (!PartialStruct.Base.isValid()) {
7450 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7451 if (IsFinalArraySection) {
7452 Address HB =
7453 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7454 .getAddress();
7455 PartialStruct.HighestElem = {FieldIndex, HB};
7456 } else {
7457 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7458 }
7459 PartialStruct.Base = BP;
7460 PartialStruct.LB = BP;
7461 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7462 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7463 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7464 if (IsFinalArraySection) {
7465 Address HB =
7466 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7467 .getAddress();
7468 PartialStruct.HighestElem = {FieldIndex, HB};
7469 } else {
7470 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7471 }
7472 }
7473 }
7474
7475 // Need to emit combined struct for array sections.
7476 if (IsFinalArraySection || IsNonContiguous)
7477 PartialStruct.IsArraySection = true;
7478
7479 // If we have a final array section, we are done with this expression.
7480 if (IsFinalArraySection)
7481 break;
7482
7483 // The pointer becomes the base for the next element.
7484 if (Next != CE)
7485 BP = IsMemberReference ? LowestElem : LB;
7486 if (!IsPartialMapped)
7487 IsExpressionFirstInfo = false;
7488 IsCaptureFirstInfo = false;
7489 FirstPointerInComplexData = false;
7490 IsPrevMemberReference = IsMemberReference;
7491 } else if (FirstPointerInComplexData) {
7492 QualType Ty = Components.rbegin()
7493 ->getAssociatedDeclaration()
7494 ->getType()
7495 .getNonReferenceType();
7496 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7497 FirstPointerInComplexData = false;
7498 }
7499 }
7500 // If ran into the whole component - allocate the space for the whole
7501 // record.
7502 if (!EncounteredME)
7503 PartialStruct.HasCompleteRecord = true;
7504
7505 if (!IsNonContiguous)
7506 return;
7507
7508 const ASTContext &Context = CGF.getContext();
7509
7510 // For supporting stride in array section, we need to initialize the first
7511 // dimension size as 1, first offset as 0, and first count as 1
7512 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7513 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7514 MapValuesArrayTy CurStrides;
7515 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7516 uint64_t ElementTypeSize;
7517
7518 // Collect Size information for each dimension and get the element size as
7519 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7520 // should be [10, 10] and the first stride is 4 btyes.
7522 Components) {
7523 const Expr *AssocExpr = Component.getAssociatedExpression();
7524 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7525
7526 if (!OASE)
7527 continue;
7528
7529 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7530 auto *CAT = Context.getAsConstantArrayType(Ty);
7531 auto *VAT = Context.getAsVariableArrayType(Ty);
7532
7533 // We need all the dimension size except for the last dimension.
7534 assert((VAT || CAT || &Component == &*Components.begin()) &&
7535 "Should be either ConstantArray or VariableArray if not the "
7536 "first Component");
7537
7538 // Get element size if CurStrides is empty.
7539 if (CurStrides.empty()) {
7540 const Type *ElementType = nullptr;
7541 if (CAT)
7542 ElementType = CAT->getElementType().getTypePtr();
7543 else if (VAT)
7544 ElementType = VAT->getElementType().getTypePtr();
7545 else
7546 assert(&Component == &*Components.begin() &&
7547 "Only expect pointer (non CAT or VAT) when this is the "
7548 "first Component");
7549 // If ElementType is null, then it means the base is a pointer
7550 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7551 // for next iteration.
7552 if (ElementType) {
7553 // For the case that having pointer as base, we need to remove one
7554 // level of indirection.
7555 if (&Component != &*Components.begin())
7556 ElementType = ElementType->getPointeeOrArrayElementType();
7557 ElementTypeSize =
7558 Context.getTypeSizeInChars(ElementType).getQuantity();
7559 CurStrides.push_back(
7560 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7561 }
7562 }
7563 // Get dimension value except for the last dimension since we don't need
7564 // it.
7565 if (DimSizes.size() < Components.size() - 1) {
7566 if (CAT)
7567 DimSizes.push_back(
7568 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7569 else if (VAT)
7570 DimSizes.push_back(CGF.Builder.CreateIntCast(
7571 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7572 /*IsSigned=*/false));
7573 }
7574 }
7575
7576 // Skip the dummy dimension since we have already have its information.
7577 auto *DI = DimSizes.begin() + 1;
7578 // Product of dimension.
7579 llvm::Value *DimProd =
7580 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7581
7582 // Collect info for non-contiguous. Notice that offset, count, and stride
7583 // are only meaningful for array-section, so we insert a null for anything
7584 // other than array-section.
7585 // Also, the size of offset, count, and stride are not the same as
7586 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7587 // count, and stride are the same as the number of non-contiguous
7588 // declaration in target update to/from clause.
7590 Components) {
7591 const Expr *AssocExpr = Component.getAssociatedExpression();
7592
7593 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7594 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7595 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7596 /*isSigned=*/false);
7597 CurOffsets.push_back(Offset);
7598 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7599 CurStrides.push_back(CurStrides.back());
7600 continue;
7601 }
7602
7603 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7604
7605 if (!OASE)
7606 continue;
7607
7608 // Offset
7609 const Expr *OffsetExpr = OASE->getLowerBound();
7610 llvm::Value *Offset = nullptr;
7611 if (!OffsetExpr) {
7612 // If offset is absent, then we just set it to zero.
7613 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7614 } else {
7615 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7616 CGF.Int64Ty,
7617 /*isSigned=*/false);
7618 }
7619 CurOffsets.push_back(Offset);
7620
7621 // Count
7622 const Expr *CountExpr = OASE->getLength();
7623 llvm::Value *Count = nullptr;
7624 if (!CountExpr) {
7625 // In Clang, once a high dimension is an array section, we construct all
7626 // the lower dimension as array section, however, for case like
7627 // arr[0:2][2], Clang construct the inner dimension as an array section
7628 // but it actually is not in an array section form according to spec.
7629 if (!OASE->getColonLocFirst().isValid() &&
7630 !OASE->getColonLocSecond().isValid()) {
7631 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7632 } else {
7633 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7634 // When the length is absent it defaults to ⌈(size −
7635 // lower-bound)/stride⌉, where size is the size of the array
7636 // dimension.
7637 const Expr *StrideExpr = OASE->getStride();
7638 llvm::Value *Stride =
7639 StrideExpr
7640 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7641 CGF.Int64Ty, /*isSigned=*/false)
7642 : nullptr;
7643 if (Stride)
7644 Count = CGF.Builder.CreateUDiv(
7645 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7646 else
7647 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7648 }
7649 } else {
7650 Count = CGF.EmitScalarExpr(CountExpr);
7651 }
7652 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7653 CurCounts.push_back(Count);
7654
7655 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7656 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7657 // Offset Count Stride
7658 // D0 0 1 4 (int) <- dummy dimension
7659 // D1 0 2 8 (2 * (1) * 4)
7660 // D2 1 2 20 (1 * (1 * 5) * 4)
7661 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7662 const Expr *StrideExpr = OASE->getStride();
7663 llvm::Value *Stride =
7664 StrideExpr
7665 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7666 CGF.Int64Ty, /*isSigned=*/false)
7667 : nullptr;
7668 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7669 if (Stride)
7670 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7671 else
7672 CurStrides.push_back(DimProd);
7673 if (DI != DimSizes.end())
7674 ++DI;
7675 }
7676
7677 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7678 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7679 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7680 }
7681
7682 /// Return the adjusted map modifiers if the declaration a capture refers to
7683 /// appears in a first-private clause. This is expected to be used only with
7684 /// directives that start with 'target'.
7685 OpenMPOffloadMappingFlags
7686 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7687 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7688
7689 // A first private variable captured by reference will use only the
7690 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7691 // declaration is known as first-private in this handler.
7692 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7693 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7694 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7695 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7696 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7697 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7698 }
7699 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7700 if (I != LambdasMap.end())
7701 // for map(to: lambda): using user specified map type.
7702 return getMapTypeBits(
7703 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7704 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
7705 /*AddPtrFlag=*/false,
7706 /*AddIsTargetParamFlag=*/false,
7707 /*isNonContiguous=*/false);
7708 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7709 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7710 }
7711
7712 void getPlainLayout(const CXXRecordDecl *RD,
7714 bool AsBase) const {
7715 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7716
7717 llvm::StructType *St =
7718 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7719
7720 unsigned NumElements = St->getNumElements();
7722 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7723 RecordLayout(NumElements);
7724
7725 // Fill bases.
7726 for (const auto &I : RD->bases()) {
7727 if (I.isVirtual())
7728 continue;
7729
7730 QualType BaseTy = I.getType();
7731 const auto *Base = BaseTy->getAsCXXRecordDecl();
7732 // Ignore empty bases.
7733 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
7734 CGF.getContext()
7737 .isZero())
7738 continue;
7739
7740 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7741 RecordLayout[FieldIndex] = Base;
7742 }
7743 // Fill in virtual bases.
7744 for (const auto &I : RD->vbases()) {
7745 QualType BaseTy = I.getType();
7746 // Ignore empty bases.
7747 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
7748 continue;
7749
7750 const auto *Base = BaseTy->getAsCXXRecordDecl();
7751 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7752 if (RecordLayout[FieldIndex])
7753 continue;
7754 RecordLayout[FieldIndex] = Base;
7755 }
7756 // Fill in all the fields.
7757 assert(!RD->isUnion() && "Unexpected union.");
7758 for (const auto *Field : RD->fields()) {
7759 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7760 // will fill in later.)
7761 if (!Field->isBitField() &&
7762 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
7763 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7764 RecordLayout[FieldIndex] = Field;
7765 }
7766 }
7767 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7768 &Data : RecordLayout) {
7769 if (Data.isNull())
7770 continue;
7771 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7772 getPlainLayout(Base, Layout, /*AsBase=*/true);
7773 else
7774 Layout.push_back(cast<const FieldDecl *>(Data));
7775 }
7776 }
7777
7778 /// Generate all the base pointers, section pointers, sizes, map types, and
7779 /// mappers for the extracted mappable expressions (all included in \a
7780 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7781 /// pair of the relevant declaration and index where it occurs is appended to
7782 /// the device pointers info array.
7783 void generateAllInfoForClauses(
7784 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7785 llvm::OpenMPIRBuilder &OMPBuilder,
7786 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7787 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7788 // We have to process the component lists that relate with the same
7789 // declaration in a single chunk so that we can generate the map flags
7790 // correctly. Therefore, we organize all lists in a map.
7791 enum MapKind { Present, Allocs, Other, Total };
7792 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7794 Info;
7795
7796 // Helper function to fill the information map for the different supported
7797 // clauses.
7798 auto &&InfoGen =
7799 [&Info, &SkipVarSet](
7800 const ValueDecl *D, MapKind Kind,
7802 OpenMPMapClauseKind MapType,
7804 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7805 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7806 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7807 if (SkipVarSet.contains(D))
7808 return;
7809 auto It = Info.try_emplace(D, Total).first;
7810 It->second[Kind].emplace_back(
7811 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7812 IsImplicit, Mapper, VarRef, ForDeviceAddr);
7813 };
7814
7815 for (const auto *Cl : Clauses) {
7816 const auto *C = dyn_cast<OMPMapClause>(Cl);
7817 if (!C)
7818 continue;
7819 MapKind Kind = Other;
7820 if (llvm::is_contained(C->getMapTypeModifiers(),
7821 OMPC_MAP_MODIFIER_present))
7822 Kind = Present;
7823 else if (C->getMapType() == OMPC_MAP_alloc)
7824 Kind = Allocs;
7825 const auto *EI = C->getVarRefs().begin();
7826 for (const auto L : C->component_lists()) {
7827 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7828 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7829 C->getMapTypeModifiers(), {},
7830 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7831 E);
7832 ++EI;
7833 }
7834 }
7835 for (const auto *Cl : Clauses) {
7836 const auto *C = dyn_cast<OMPToClause>(Cl);
7837 if (!C)
7838 continue;
7839 MapKind Kind = Other;
7840 if (llvm::is_contained(C->getMotionModifiers(),
7841 OMPC_MOTION_MODIFIER_present))
7842 Kind = Present;
7843 const auto *EI = C->getVarRefs().begin();
7844 for (const auto L : C->component_lists()) {
7845 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
7846 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7847 C->isImplicit(), std::get<2>(L), *EI);
7848 ++EI;
7849 }
7850 }
7851 for (const auto *Cl : Clauses) {
7852 const auto *C = dyn_cast<OMPFromClause>(Cl);
7853 if (!C)
7854 continue;
7855 MapKind Kind = Other;
7856 if (llvm::is_contained(C->getMotionModifiers(),
7857 OMPC_MOTION_MODIFIER_present))
7858 Kind = Present;
7859 const auto *EI = C->getVarRefs().begin();
7860 for (const auto L : C->component_lists()) {
7861 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
7862 C->getMotionModifiers(),
7863 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7864 *EI);
7865 ++EI;
7866 }
7867 }
7868
7869 // Look at the use_device_ptr and use_device_addr clauses information and
7870 // mark the existing map entries as such. If there is no map information for
7871 // an entry in the use_device_ptr and use_device_addr list, we create one
7872 // with map type 'alloc' and zero size section. It is the user fault if that
7873 // was not mapped before. If there is no map information and the pointer is
7874 // a struct member, then we defer the emission of that entry until the whole
7875 // struct has been processed.
7876 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7878 DeferredInfo;
7879 MapCombinedInfoTy UseDeviceDataCombinedInfo;
7880
7881 auto &&UseDeviceDataCombinedInfoGen =
7882 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7883 CodeGenFunction &CGF, bool IsDevAddr) {
7884 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7885 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7886 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7887 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7888 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7889 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7890 UseDeviceDataCombinedInfo.Sizes.push_back(
7891 llvm::Constant::getNullValue(CGF.Int64Ty));
7892 UseDeviceDataCombinedInfo.Types.push_back(
7893 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7894 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7895 };
7896
7897 auto &&MapInfoGen =
7898 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7899 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7901 Components,
7902 bool IsImplicit, bool IsDevAddr) {
7903 // We didn't find any match in our map information - generate a zero
7904 // size array section - if the pointer is a struct member we defer
7905 // this action until the whole struct has been processed.
7906 if (isa<MemberExpr>(IE)) {
7907 // Insert the pointer into Info to be processed by
7908 // generateInfoForComponentList. Because it is a member pointer
7909 // without a pointee, no entry will be generated for it, therefore
7910 // we need to generate one after the whole struct has been
7911 // processed. Nonetheless, generateInfoForComponentList must be
7912 // called to take the pointer into account for the calculation of
7913 // the range of the partial struct.
7914 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
7915 /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
7916 IsDevAddr);
7917 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7918 } else {
7919 llvm::Value *Ptr;
7920 if (IsDevAddr) {
7921 if (IE->isGLValue())
7922 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7923 else
7924 Ptr = CGF.EmitScalarExpr(IE);
7925 } else {
7926 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7927 }
7928 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7929 }
7930 };
7931
7932 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7933 const Expr *IE, bool IsDevAddr) -> bool {
7934 // We potentially have map information for this declaration already.
7935 // Look for the first set of components that refer to it. If found,
7936 // return true.
7937 // If the first component is a member expression, we have to look into
7938 // 'this', which maps to null in the map of map information. Otherwise
7939 // look directly for the information.
7940 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7941 if (It != Info.end()) {
7942 bool Found = false;
7943 for (auto &Data : It->second) {
7944 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7945 return MI.Components.back().getAssociatedDeclaration() == VD;
7946 });
7947 // If we found a map entry, signal that the pointer has to be
7948 // returned and move on to the next declaration. Exclude cases where
7949 // the base pointer is mapped as array subscript, array section or
7950 // array shaping. The base address is passed as a pointer to base in
7951 // this case and cannot be used as a base for use_device_ptr list
7952 // item.
7953 if (CI != Data.end()) {
7954 if (IsDevAddr) {
7955 CI->ForDeviceAddr = IsDevAddr;
7956 CI->ReturnDevicePointer = true;
7957 Found = true;
7958 break;
7959 } else {
7960 auto PrevCI = std::next(CI->Components.rbegin());
7961 const auto *VarD = dyn_cast<VarDecl>(VD);
7963 isa<MemberExpr>(IE) ||
7964 !VD->getType().getNonReferenceType()->isPointerType() ||
7965 PrevCI == CI->Components.rend() ||
7966 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7967 VarD->hasLocalStorage()) {
7968 CI->ForDeviceAddr = IsDevAddr;
7969 CI->ReturnDevicePointer = true;
7970 Found = true;
7971 break;
7972 }
7973 }
7974 }
7975 }
7976 return Found;
7977 }
7978 return false;
7979 };
7980
7981 // Look at the use_device_ptr clause information and mark the existing map
7982 // entries as such. If there is no map information for an entry in the
7983 // use_device_ptr list, we create one with map type 'alloc' and zero size
7984 // section. It is the user fault if that was not mapped before. If there is
7985 // no map information and the pointer is a struct member, then we defer the
7986 // emission of that entry until the whole struct has been processed.
7987 for (const auto *Cl : Clauses) {
7988 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
7989 if (!C)
7990 continue;
7991 for (const auto L : C->component_lists()) {
7993 std::get<1>(L);
7994 assert(!Components.empty() &&
7995 "Not expecting empty list of components!");
7996 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
7997 VD = cast<ValueDecl>(VD->getCanonicalDecl());
7998 const Expr *IE = Components.back().getAssociatedExpression();
7999 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8000 continue;
8001 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8002 /*IsDevAddr=*/false);
8003 }
8004 }
8005
8006 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8007 for (const auto *Cl : Clauses) {
8008 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8009 if (!C)
8010 continue;
8011 for (const auto L : C->component_lists()) {
8013 std::get<1>(L);
8014 assert(!std::get<1>(L).empty() &&
8015 "Not expecting empty list of components!");
8016 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8017 if (!Processed.insert(VD).second)
8018 continue;
8019 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8020 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8021 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8022 continue;
8023 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8024 /*IsDevAddr=*/true);
8025 }
8026 }
8027
8028 for (const auto &Data : Info) {
8029 StructRangeInfoTy PartialStruct;
8030 // Current struct information:
8031 MapCombinedInfoTy CurInfo;
8032 // Current struct base information:
8033 MapCombinedInfoTy StructBaseCurInfo;
8034 const Decl *D = Data.first;
8035 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8036 bool HasMapBasePtr = false;
8037 bool HasMapArraySec = false;
8038 if (VD && VD->getType()->isAnyPointerType()) {
8039 for (const auto &M : Data.second) {
8040 HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8041 return isa_and_present<DeclRefExpr>(L.VarRef);
8042 });
8043 HasMapArraySec = any_of(M, [](const MapInfo &L) {
8044 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8045 L.VarRef);
8046 });
8047 if (HasMapBasePtr && HasMapArraySec)
8048 break;
8049 }
8050 }
8051 for (const auto &M : Data.second) {
8052 for (const MapInfo &L : M) {
8053 assert(!L.Components.empty() &&
8054 "Not expecting declaration with no component lists.");
8055
8056 // Remember the current base pointer index.
8057 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8058 unsigned StructBasePointersIdx =
8059 StructBaseCurInfo.BasePointers.size();
8060 CurInfo.NonContigInfo.IsNonContiguous =
8061 L.Components.back().isNonContiguous();
8062 generateInfoForComponentList(
8063 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8064 CurInfo, StructBaseCurInfo, PartialStruct,
8065 /*IsFirstComponentList=*/false, L.IsImplicit,
8066 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8067 L.VarRef, /*OverlappedElements*/ {},
8068 HasMapBasePtr && HasMapArraySec);
8069
8070 // If this entry relates to a device pointer, set the relevant
8071 // declaration and add the 'return pointer' flag.
8072 if (L.ReturnDevicePointer) {
8073 // Check whether a value was added to either CurInfo or
8074 // StructBaseCurInfo and error if no value was added to either of
8075 // them:
8076 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8077 StructBasePointersIdx <
8078 StructBaseCurInfo.BasePointers.size()) &&
8079 "Unexpected number of mapped base pointers.");
8080
8081 // Choose a base pointer index which is always valid:
8082 const ValueDecl *RelevantVD =
8083 L.Components.back().getAssociatedDeclaration();
8084 assert(RelevantVD &&
8085 "No relevant declaration related with device pointer??");
8086
8087 // If StructBaseCurInfo has been updated this iteration then work on
8088 // the first new entry added to it i.e. make sure that when multiple
8089 // values are added to any of the lists, the first value added is
8090 // being modified by the assignments below (not the last value
8091 // added).
8092 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8093 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8094 RelevantVD;
8095 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8096 L.ForDeviceAddr ? DeviceInfoTy::Address
8097 : DeviceInfoTy::Pointer;
8098 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8099 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8100 } else {
8101 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8102 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8103 L.ForDeviceAddr ? DeviceInfoTy::Address
8104 : DeviceInfoTy::Pointer;
8105 CurInfo.Types[CurrentBasePointersIdx] |=
8106 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8107 }
8108 }
8109 }
8110 }
8111
8112 // Append any pending zero-length pointers which are struct members and
8113 // used with use_device_ptr or use_device_addr.
8114 auto CI = DeferredInfo.find(Data.first);
8115 if (CI != DeferredInfo.end()) {
8116 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8117 llvm::Value *BasePtr;
8118 llvm::Value *Ptr;
8119 if (L.ForDeviceAddr) {
8120 if (L.IE->isGLValue())
8121 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8122 else
8123 Ptr = this->CGF.EmitScalarExpr(L.IE);
8124 BasePtr = Ptr;
8125 // Entry is RETURN_PARAM. Also, set the placeholder value
8126 // MEMBER_OF=FFFF so that the entry is later updated with the
8127 // correct value of MEMBER_OF.
8128 CurInfo.Types.push_back(
8129 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8130 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8131 } else {
8132 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8133 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8134 L.IE->getExprLoc());
8135 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8136 // placeholder value MEMBER_OF=FFFF so that the entry is later
8137 // updated with the correct value of MEMBER_OF.
8138 CurInfo.Types.push_back(
8139 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8140 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8141 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8142 }
8143 CurInfo.Exprs.push_back(L.VD);
8144 CurInfo.BasePointers.emplace_back(BasePtr);
8145 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8146 CurInfo.DevicePointers.emplace_back(
8147 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8148 CurInfo.Pointers.push_back(Ptr);
8149 CurInfo.Sizes.push_back(
8150 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8151 CurInfo.Mappers.push_back(nullptr);
8152 }
8153 }
8154
8155 // Unify entries in one list making sure the struct mapping precedes the
8156 // individual fields:
8157 MapCombinedInfoTy UnionCurInfo;
8158 UnionCurInfo.append(StructBaseCurInfo);
8159 UnionCurInfo.append(CurInfo);
8160
8161 // If there is an entry in PartialStruct it means we have a struct with
8162 // individual members mapped. Emit an extra combined entry.
8163 if (PartialStruct.Base.isValid()) {
8164 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8165 // Emit a combined entry:
8166 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8167 /*IsMapThis*/ !VD, OMPBuilder, VD);
8168 }
8169
8170 // We need to append the results of this capture to what we already have.
8171 CombinedInfo.append(UnionCurInfo);
8172 }
8173 // Append data for use_device_ptr clauses.
8174 CombinedInfo.append(UseDeviceDataCombinedInfo);
8175 }
8176
8177public:
8178 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8179 : CurDir(&Dir), CGF(CGF) {
8180 // Extract firstprivate clause information.
8181 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8182 for (const auto *D : C->varlist())
8183 FirstPrivateDecls.try_emplace(
8184 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8185 // Extract implicit firstprivates from uses_allocators clauses.
8186 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8187 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8188 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8189 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8190 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8191 /*Implicit=*/true);
8192 else if (const auto *VD = dyn_cast<VarDecl>(
8193 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8194 ->getDecl()))
8195 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8196 }
8197 }
8198 // Extract device pointer clause information.
8199 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8200 for (auto L : C->component_lists())
8201 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8202 // Extract device addr clause information.
8203 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8204 for (auto L : C->component_lists())
8205 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8206 // Extract map information.
8207 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8208 if (C->getMapType() != OMPC_MAP_to)
8209 continue;
8210 for (auto L : C->component_lists()) {
8211 const ValueDecl *VD = std::get<0>(L);
8212 const auto *RD = VD ? VD->getType()
8216 : nullptr;
8217 if (RD && RD->isLambda())
8218 LambdasMap.try_emplace(std::get<0>(L), C);
8219 }
8220 }
8221 }
8222
8223 /// Constructor for the declare mapper directive.
8224 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8225 : CurDir(&Dir), CGF(CGF) {}
8226
8227 /// Generate code for the combined entry if we have a partially mapped struct
8228 /// and take care of the mapping flags of the arguments corresponding to
8229 /// individual struct members.
8230 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8231 MapFlagsArrayTy &CurTypes,
8232 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8233 llvm::OpenMPIRBuilder &OMPBuilder,
8234 const ValueDecl *VD = nullptr,
8235 bool NotTargetParams = true) const {
8236 if (CurTypes.size() == 1 &&
8237 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8238 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8239 !PartialStruct.IsArraySection)
8240 return;
8241 Address LBAddr = PartialStruct.LowestElem.second;
8242 Address HBAddr = PartialStruct.HighestElem.second;
8243 if (PartialStruct.HasCompleteRecord) {
8244 LBAddr = PartialStruct.LB;
8245 HBAddr = PartialStruct.LB;
8246 }
8247 CombinedInfo.Exprs.push_back(VD);
8248 // Base is the base of the struct
8249 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8250 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8251 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8252 // Pointer is the address of the lowest element
8253 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8254 const CXXMethodDecl *MD =
8255 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8256 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8257 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8258 // There should not be a mapper for a combined entry.
8259 if (HasBaseClass) {
8260 // OpenMP 5.2 148:21:
8261 // If the target construct is within a class non-static member function,
8262 // and a variable is an accessible data member of the object for which the
8263 // non-static data member function is invoked, the variable is treated as
8264 // if the this[:1] expression had appeared in a map clause with a map-type
8265 // of tofrom.
8266 // Emit this[:1]
8267 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8269 llvm::Value *Size =
8270 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8271 /*isSigned=*/true);
8272 CombinedInfo.Sizes.push_back(Size);
8273 } else {
8274 CombinedInfo.Pointers.push_back(LB);
8275 // Size is (addr of {highest+1} element) - (addr of lowest element)
8276 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8277 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8278 HBAddr.getElementType(), HB, /*Idx0=*/1);
8279 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8280 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8281 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8282 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8283 /*isSigned=*/false);
8284 CombinedInfo.Sizes.push_back(Size);
8285 }
8286 CombinedInfo.Mappers.push_back(nullptr);
8287 // Map type is always TARGET_PARAM, if generate info for captures.
8288 CombinedInfo.Types.push_back(
8289 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8290 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
8291 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
8292 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8293 // If any element has the present modifier, then make sure the runtime
8294 // doesn't attempt to allocate the struct.
8295 if (CurTypes.end() !=
8296 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8297 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8298 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8299 }))
8300 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8301 // Remove TARGET_PARAM flag from the first element
8302 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8303 // If any element has the ompx_hold modifier, then make sure the runtime
8304 // uses the hold reference count for the struct as a whole so that it won't
8305 // be unmapped by an extra dynamic reference count decrement. Add it to all
8306 // elements as well so the runtime knows which reference count to check
8307 // when determining whether it's time for device-to-host transfers of
8308 // individual elements.
8309 if (CurTypes.end() !=
8310 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8311 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8312 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8313 })) {
8314 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8315 for (auto &M : CurTypes)
8316 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8317 }
8318
8319 // All other current entries will be MEMBER_OF the combined entry
8320 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8321 // 0xFFFF in the MEMBER_OF field).
8322 OpenMPOffloadMappingFlags MemberOfFlag =
8323 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8324 for (auto &M : CurTypes)
8325 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8326 }
8327
8328 /// Generate all the base pointers, section pointers, sizes, map types, and
8329 /// mappers for the extracted mappable expressions (all included in \a
8330 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8331 /// pair of the relevant declaration and index where it occurs is appended to
8332 /// the device pointers info array.
8333 void generateAllInfo(
8334 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8335 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8336 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8337 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8338 "Expect a executable directive");
8339 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8340 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8341 SkipVarSet);
8342 }
8343
8344 /// Generate all the base pointers, section pointers, sizes, map types, and
8345 /// mappers for the extracted map clauses of user-defined mapper (all included
8346 /// in \a CombinedInfo).
8347 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8348 llvm::OpenMPIRBuilder &OMPBuilder) const {
8349 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
8350 "Expect a declare mapper directive");
8351 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
8352 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8353 OMPBuilder);
8354 }
8355
8356 /// Emit capture info for lambdas for variables captured by reference.
8357 void generateInfoForLambdaCaptures(
8358 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8359 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8361 const auto *RD = VDType->getAsCXXRecordDecl();
8362 if (!RD || !RD->isLambda())
8363 return;
8364 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8365 CGF.getContext().getDeclAlign(VD));
8366 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8367 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8368 FieldDecl *ThisCapture = nullptr;
8369 RD->getCaptureFields(Captures, ThisCapture);
8370 if (ThisCapture) {
8371 LValue ThisLVal =
8372 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8373 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8374 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8375 VDLVal.getPointer(CGF));
8376 CombinedInfo.Exprs.push_back(VD);
8377 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8378 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8379 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8380 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8381 CombinedInfo.Sizes.push_back(
8382 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8383 CGF.Int64Ty, /*isSigned=*/true));
8384 CombinedInfo.Types.push_back(
8385 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8386 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8387 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8388 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8389 CombinedInfo.Mappers.push_back(nullptr);
8390 }
8391 for (const LambdaCapture &LC : RD->captures()) {
8392 if (!LC.capturesVariable())
8393 continue;
8394 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8395 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8396 continue;
8397 auto It = Captures.find(VD);
8398 assert(It != Captures.end() && "Found lambda capture without field.");
8399 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8400 if (LC.getCaptureKind() == LCK_ByRef) {
8401 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8402 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8403 VDLVal.getPointer(CGF));
8404 CombinedInfo.Exprs.push_back(VD);
8405 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8406 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8407 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8408 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8409 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8410 CGF.getTypeSize(
8412 CGF.Int64Ty, /*isSigned=*/true));
8413 } else {
8414 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8415 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8416 VDLVal.getPointer(CGF));
8417 CombinedInfo.Exprs.push_back(VD);
8418 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8419 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8420 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8421 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8422 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8423 }
8424 CombinedInfo.Types.push_back(
8425 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8426 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8427 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8428 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8429 CombinedInfo.Mappers.push_back(nullptr);
8430 }
8431 }
8432
8433 /// Set correct indices for lambdas captures.
8434 void adjustMemberOfForLambdaCaptures(
8435 llvm::OpenMPIRBuilder &OMPBuilder,
8436 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8437 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8438 MapFlagsArrayTy &Types) const {
8439 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8440 // Set correct member_of idx for all implicit lambda captures.
8441 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8442 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8443 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8444 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8445 continue;
8446 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8447 assert(BasePtr && "Unable to find base lambda address.");
8448 int TgtIdx = -1;
8449 for (unsigned J = I; J > 0; --J) {
8450 unsigned Idx = J - 1;
8451 if (Pointers[Idx] != BasePtr)
8452 continue;
8453 TgtIdx = Idx;
8454 break;
8455 }
8456 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8457 // All other current entries will be MEMBER_OF the combined entry
8458 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8459 // 0xFFFF in the MEMBER_OF field).
8460 OpenMPOffloadMappingFlags MemberOfFlag =
8461 OMPBuilder.getMemberOfFlag(TgtIdx);
8462 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8463 }
8464 }
8465
8466 /// Generate the base pointers, section pointers, sizes, map types, and
8467 /// mappers associated to a given capture (all included in \a CombinedInfo).
8468 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8469 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8470 StructRangeInfoTy &PartialStruct) const {
8471 assert(!Cap->capturesVariableArrayType() &&
8472 "Not expecting to generate map info for a variable array type!");
8473
8474 // We need to know when we generating information for the first component
8475 const ValueDecl *VD = Cap->capturesThis()
8476 ? nullptr
8477 : Cap->getCapturedVar()->getCanonicalDecl();
8478
8479 // for map(to: lambda): skip here, processing it in
8480 // generateDefaultMapInfo
8481 if (LambdasMap.count(VD))
8482 return;
8483
8484 // If this declaration appears in a is_device_ptr clause we just have to
8485 // pass the pointer by value. If it is a reference to a declaration, we just
8486 // pass its value.
8487 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8488 CombinedInfo.Exprs.push_back(VD);
8489 CombinedInfo.BasePointers.emplace_back(Arg);
8490 CombinedInfo.DevicePtrDecls.emplace_back(VD);
8491 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8492 CombinedInfo.Pointers.push_back(Arg);
8493 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8494 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8495 /*isSigned=*/true));
8496 CombinedInfo.Types.push_back(
8497 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8498 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8499 CombinedInfo.Mappers.push_back(nullptr);
8500 return;
8501 }
8502
8503 using MapData =
8506 const ValueDecl *, const Expr *>;
8507 SmallVector<MapData, 4> DeclComponentLists;
8508 // For member fields list in is_device_ptr, store it in
8509 // DeclComponentLists for generating components info.
8511 auto It = DevPointersMap.find(VD);
8512 if (It != DevPointersMap.end())
8513 for (const auto &MCL : It->second)
8514 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8515 /*IsImpicit = */ true, nullptr,
8516 nullptr);
8517 auto I = HasDevAddrsMap.find(VD);
8518 if (I != HasDevAddrsMap.end())
8519 for (const auto &MCL : I->second)
8520 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8521 /*IsImpicit = */ true, nullptr,
8522 nullptr);
8523 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8524 "Expect a executable directive");
8525 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8526 bool HasMapBasePtr = false;
8527 bool HasMapArraySec = false;
8528 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8529 const auto *EI = C->getVarRefs().begin();
8530 for (const auto L : C->decl_component_lists(VD)) {
8531 const ValueDecl *VDecl, *Mapper;
8532 // The Expression is not correct if the mapping is implicit
8533 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8535 std::tie(VDecl, Components, Mapper) = L;
8536 assert(VDecl == VD && "We got information for the wrong declaration??");
8537 assert(!Components.empty() &&
8538 "Not expecting declaration with no component lists.");
8539 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8540 HasMapBasePtr = true;
8541 if (VD && E && VD->getType()->isAnyPointerType() &&
8542 (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8543 HasMapArraySec = true;
8544 DeclComponentLists.emplace_back(Components, C->getMapType(),
8545 C->getMapTypeModifiers(),
8546 C->isImplicit(), Mapper, E);
8547 ++EI;
8548 }
8549 }
8550 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8551 const MapData &RHS) {
8552 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8553 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8554 bool HasPresent =
8555 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8556 bool HasAllocs = MapType == OMPC_MAP_alloc;
8557 MapModifiers = std::get<2>(RHS);
8558 MapType = std::get<1>(LHS);
8559 bool HasPresentR =
8560 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8561 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8562 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8563 });
8564
8565 // Find overlapping elements (including the offset from the base element).
8566 llvm::SmallDenseMap<
8567 const MapData *,
8570 4>
8571 OverlappedData;
8572 size_t Count = 0;
8573 for (const MapData &L : DeclComponentLists) {
8575 OpenMPMapClauseKind MapType;
8577 bool IsImplicit;
8578 const ValueDecl *Mapper;
8579 const Expr *VarRef;
8580 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8581 L;
8582 ++Count;
8583 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8585 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8586 VarRef) = L1;
8587 auto CI = Components.rbegin();
8588 auto CE = Components.rend();
8589 auto SI = Components1.rbegin();
8590 auto SE = Components1.rend();
8591 for (; CI != CE && SI != SE; ++CI, ++SI) {
8592 if (CI->getAssociatedExpression()->getStmtClass() !=
8593 SI->getAssociatedExpression()->getStmtClass())
8594 break;
8595 // Are we dealing with different variables/fields?
8596 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8597 break;
8598 }
8599 // Found overlapping if, at least for one component, reached the head
8600 // of the components list.
8601 if (CI == CE || SI == SE) {
8602 // Ignore it if it is the same component.
8603 if (CI == CE && SI == SE)
8604 continue;
8605 const auto It = (SI == SE) ? CI : SI;
8606 // If one component is a pointer and another one is a kind of
8607 // dereference of this pointer (array subscript, section, dereference,
8608 // etc.), it is not an overlapping.
8609 // Same, if one component is a base and another component is a
8610 // dereferenced pointer memberexpr with the same base.
8611 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8612 (std::prev(It)->getAssociatedDeclaration() &&
8613 std::prev(It)
8614 ->getAssociatedDeclaration()
8615 ->getType()
8616 ->isPointerType()) ||
8617 (It->getAssociatedDeclaration() &&
8618 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8619 std::next(It) != CE && std::next(It) != SE))
8620 continue;
8621 const MapData &BaseData = CI == CE ? L : L1;
8623 SI == SE ? Components : Components1;
8624 OverlappedData[&BaseData].push_back(SubData);
8625 }
8626 }
8627 }
8628 // Sort the overlapped elements for each item.
8630 if (!OverlappedData.empty()) {
8631 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8632 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8633 while (BaseType != OrigType) {
8634 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8635 OrigType = BaseType->getPointeeOrArrayElementType();
8636 }
8637
8638 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8639 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8640 else {
8641 const auto *RD = BaseType->getAsRecordDecl();
8642 Layout.append(RD->field_begin(), RD->field_end());
8643 }
8644 }
8645 for (auto &Pair : OverlappedData) {
8646 llvm::stable_sort(
8647 Pair.getSecond(),
8648 [&Layout](
8651 Second) {
8652 auto CI = First.rbegin();
8653 auto CE = First.rend();
8654 auto SI = Second.rbegin();
8655 auto SE = Second.rend();
8656 for (; CI != CE && SI != SE; ++CI, ++SI) {
8657 if (CI->getAssociatedExpression()->getStmtClass() !=
8658 SI->getAssociatedExpression()->getStmtClass())
8659 break;
8660 // Are we dealing with different variables/fields?
8661 if (CI->getAssociatedDeclaration() !=
8662 SI->getAssociatedDeclaration())
8663 break;
8664 }
8665
8666 // Lists contain the same elements.
8667 if (CI == CE && SI == SE)
8668 return false;
8669
8670 // List with less elements is less than list with more elements.
8671 if (CI == CE || SI == SE)
8672 return CI == CE;
8673
8674 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8675 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8676 if (FD1->getParent() == FD2->getParent())
8677 return FD1->getFieldIndex() < FD2->getFieldIndex();
8678 const auto *It =
8679 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8680 return FD == FD1 || FD == FD2;
8681 });
8682 return *It == FD1;
8683 });
8684 }
8685
8686 // Associated with a capture, because the mapping flags depend on it.
8687 // Go through all of the elements with the overlapped elements.
8688 bool IsFirstComponentList = true;
8689 MapCombinedInfoTy StructBaseCombinedInfo;
8690 for (const auto &Pair : OverlappedData) {
8691 const MapData &L = *Pair.getFirst();
8693 OpenMPMapClauseKind MapType;
8695 bool IsImplicit;
8696 const ValueDecl *Mapper;
8697 const Expr *VarRef;
8698 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8699 L;
8701 OverlappedComponents = Pair.getSecond();
8702 generateInfoForComponentList(
8703 MapType, MapModifiers, {}, Components, CombinedInfo,
8704 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8705 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8706 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8707 IsFirstComponentList = false;
8708 }
8709 // Go through other elements without overlapped elements.
8710 for (const MapData &L : DeclComponentLists) {
8712 OpenMPMapClauseKind MapType;
8714 bool IsImplicit;
8715 const ValueDecl *Mapper;
8716 const Expr *VarRef;
8717 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8718 L;
8719 auto It = OverlappedData.find(&L);
8720 if (It == OverlappedData.end())
8721 generateInfoForComponentList(
8722 MapType, MapModifiers, {}, Components, CombinedInfo,
8723 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8724 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8725 /*ForDeviceAddr=*/false, VD, VarRef,
8726 /*OverlappedElements*/ {}, HasMapBasePtr && HasMapArraySec);
8727 IsFirstComponentList = false;
8728 }
8729 }
8730
8731 /// Generate the default map information for a given capture \a CI,
8732 /// record field declaration \a RI and captured value \a CV.
8733 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8734 const FieldDecl &RI, llvm::Value *CV,
8735 MapCombinedInfoTy &CombinedInfo) const {
8736 bool IsImplicit = true;
8737 // Do the default mapping.
8738 if (CI.capturesThis()) {
8739 CombinedInfo.Exprs.push_back(nullptr);
8740 CombinedInfo.BasePointers.push_back(CV);
8741 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8742 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8743 CombinedInfo.Pointers.push_back(CV);
8744 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8745 CombinedInfo.Sizes.push_back(
8746 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8747 CGF.Int64Ty, /*isSigned=*/true));
8748 // Default map type.
8749 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8750 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8751 } else if (CI.capturesVariableByCopy()) {
8752 const VarDecl *VD = CI.getCapturedVar();
8753 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8754 CombinedInfo.BasePointers.push_back(CV);
8755 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8756 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8757 CombinedInfo.Pointers.push_back(CV);
8758 if (!RI.getType()->isAnyPointerType()) {
8759 // We have to signal to the runtime captures passed by value that are
8760 // not pointers.
8761 CombinedInfo.Types.push_back(
8762 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8763 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8764 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8765 } else {
8766 // Pointers are implicitly mapped with a zero size and no flags
8767 // (other than first map that is added for all implicit maps).
8768 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8769 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8770 }
8771 auto I = FirstPrivateDecls.find(VD);
8772 if (I != FirstPrivateDecls.end())
8773 IsImplicit = I->getSecond();
8774 } else {
8775 assert(CI.capturesVariable() && "Expected captured reference.");
8776 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8777 QualType ElementType = PtrTy->getPointeeType();
8778 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8779 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8780 // The default map type for a scalar/complex type is 'to' because by
8781 // default the value doesn't have to be retrieved. For an aggregate
8782 // type, the default is 'tofrom'.
8783 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8784 const VarDecl *VD = CI.getCapturedVar();
8785 auto I = FirstPrivateDecls.find(VD);
8786 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8787 CombinedInfo.BasePointers.push_back(CV);
8788 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8789 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8790 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8791 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8792 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8794 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
8795 } else {
8796 CombinedInfo.Pointers.push_back(CV);
8797 }
8798 if (I != FirstPrivateDecls.end())
8799 IsImplicit = I->getSecond();
8800 }
8801 // Every default map produces a single argument which is a target parameter.
8802 CombinedInfo.Types.back() |=
8803 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8804
8805 // Add flag stating this is an implicit map.
8806 if (IsImplicit)
8807 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8808
8809 // No user-defined mapper for default mapping.
8810 CombinedInfo.Mappers.push_back(nullptr);
8811 }
8812};
8813} // anonymous namespace
8814
8815// Try to extract the base declaration from a `this->x` expression if possible.
8817 if (!E)
8818 return nullptr;
8819
8820 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
8821 if (const MemberExpr *ME =
8822 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8823 return ME->getMemberDecl();
8824 return nullptr;
8825}
8826
8827/// Emit a string constant containing the names of the values mapped to the
8828/// offloading runtime library.
8829static llvm::Constant *
8830emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8831 MappableExprsHandler::MappingExprInfo &MapExprs) {
8832
8833 uint32_t SrcLocStrSize;
8834 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8835 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8836
8838 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8839 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8840 Loc = VD->getLocation();
8841 else
8842 Loc = MapExprs.getMapExpr()->getExprLoc();
8843 } else {
8844 Loc = MapExprs.getMapDecl()->getLocation();
8845 }
8846
8847 std::string ExprName;
8848 if (MapExprs.getMapExpr()) {
8850 llvm::raw_string_ostream OS(ExprName);
8851 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8852 } else {
8853 ExprName = MapExprs.getMapDecl()->getNameAsString();
8854 }
8855
8857 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8858 PLoc.getLine(), PLoc.getColumn(),
8859 SrcLocStrSize);
8860}
8861/// Emit the arrays used to pass the captures and map information to the
8862/// offloading runtime library. If there is no map or capture information,
8863/// return nullptr by reference.
8865 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8866 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8867 bool IsNonContiguous = false, bool ForEndCall = false) {
8868 CodeGenModule &CGM = CGF.CGM;
8869
8870 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8871 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8872 CGF.AllocaInsertPt->getIterator());
8873 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8874 CGF.Builder.GetInsertPoint());
8875
8876 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8877 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8878 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8879 }
8880 };
8881
8882 auto CustomMapperCB = [&](unsigned int I) {
8883 llvm::Value *MFunc = nullptr;
8884 if (CombinedInfo.Mappers[I]) {
8885 Info.HasMapper = true;
8887 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8888 }
8889 return MFunc;
8890 };
8891 OMPBuilder.emitOffloadingArraysAndArgs(
8892 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, IsNonContiguous,
8893 ForEndCall, DeviceAddrCB, CustomMapperCB);
8894}
8895
8896/// Check for inner distribute directive.
8897static const OMPExecutableDirective *
8899 const auto *CS = D.getInnermostCapturedStmt();
8900 const auto *Body =
8901 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8902 const Stmt *ChildStmt =
8904
8905 if (const auto *NestedDir =
8906 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8907 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8908 switch (D.getDirectiveKind()) {
8909 case OMPD_target:
8910 // For now, treat 'target' with nested 'teams loop' as if it's
8911 // distributed (target teams distribute).
8912 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8913 return NestedDir;
8914 if (DKind == OMPD_teams) {
8915 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8916 /*IgnoreCaptured=*/true);
8917 if (!Body)
8918 return nullptr;
8919 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8920 if (const auto *NND =
8921 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8922 DKind = NND->getDirectiveKind();
8923 if (isOpenMPDistributeDirective(DKind))
8924 return NND;
8925 }
8926 }
8927 return nullptr;
8928 case OMPD_target_teams:
8929 if (isOpenMPDistributeDirective(DKind))
8930 return NestedDir;
8931 return nullptr;
8932 case OMPD_target_parallel:
8933 case OMPD_target_simd:
8934 case OMPD_target_parallel_for:
8935 case OMPD_target_parallel_for_simd:
8936 return nullptr;
8937 case OMPD_target_teams_distribute:
8938 case OMPD_target_teams_distribute_simd:
8939 case OMPD_target_teams_distribute_parallel_for:
8940 case OMPD_target_teams_distribute_parallel_for_simd:
8941 case OMPD_parallel:
8942 case OMPD_for:
8943 case OMPD_parallel_for:
8944 case OMPD_parallel_master:
8945 case OMPD_parallel_sections:
8946 case OMPD_for_simd:
8947 case OMPD_parallel_for_simd:
8948 case OMPD_cancel:
8949 case OMPD_cancellation_point:
8950 case OMPD_ordered:
8951 case OMPD_threadprivate:
8952 case OMPD_allocate:
8953 case OMPD_task:
8954 case OMPD_simd:
8955 case OMPD_tile:
8956 case OMPD_unroll:
8957 case OMPD_sections:
8958 case OMPD_section:
8959 case OMPD_single:
8960 case OMPD_master:
8961 case OMPD_critical:
8962 case OMPD_taskyield:
8963 case OMPD_barrier:
8964 case OMPD_taskwait:
8965 case OMPD_taskgroup:
8966 case OMPD_atomic:
8967 case OMPD_flush:
8968 case OMPD_depobj:
8969 case OMPD_scan:
8970 case OMPD_teams:
8971 case OMPD_target_data:
8972 case OMPD_target_exit_data:
8973 case OMPD_target_enter_data:
8974 case OMPD_distribute:
8975 case OMPD_distribute_simd:
8976 case OMPD_distribute_parallel_for:
8977 case OMPD_distribute_parallel_for_simd:
8978 case OMPD_teams_distribute:
8979 case OMPD_teams_distribute_simd:
8980 case OMPD_teams_distribute_parallel_for:
8981 case OMPD_teams_distribute_parallel_for_simd:
8982 case OMPD_target_update:
8983 case OMPD_declare_simd:
8984 case OMPD_declare_variant:
8985 case OMPD_begin_declare_variant:
8986 case OMPD_end_declare_variant:
8987 case OMPD_declare_target:
8988 case OMPD_end_declare_target:
8989 case OMPD_declare_reduction:
8990 case OMPD_declare_mapper:
8991 case OMPD_taskloop:
8992 case OMPD_taskloop_simd:
8993 case OMPD_master_taskloop:
8994 case OMPD_master_taskloop_simd:
8995 case OMPD_parallel_master_taskloop:
8996 case OMPD_parallel_master_taskloop_simd:
8997 case OMPD_requires:
8998 case OMPD_metadirective:
8999 case OMPD_unknown:
9000 default:
9001 llvm_unreachable("Unexpected directive.");
9002 }
9003 }
9004
9005 return nullptr;
9006}
9007
9008/// Emit the user-defined mapper function. The code generation follows the
9009/// pattern in the example below.
9010/// \code
9011/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9012/// void *base, void *begin,
9013/// int64_t size, int64_t type,
9014/// void *name = nullptr) {
9015/// // Allocate space for an array section first or add a base/begin for
9016/// // pointer dereference.
9017/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9018/// !maptype.IsDelete)
9019/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9020/// size*sizeof(Ty), clearToFromMember(type));
9021/// // Map members.
9022/// for (unsigned i = 0; i < size; i++) {
9023/// // For each component specified by this mapper:
9024/// for (auto c : begin[i]->all_components) {
9025/// if (c.hasMapper())
9026/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9027/// c.arg_type, c.arg_name);
9028/// else
9029/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9030/// c.arg_begin, c.arg_size, c.arg_type,
9031/// c.arg_name);
9032/// }
9033/// }
9034/// // Delete the array section.
9035/// if (size > 1 && maptype.IsDelete)
9036/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9037/// size*sizeof(Ty), clearToFromMember(type));
9038/// }
9039/// \endcode
9041 CodeGenFunction *CGF) {
9042 if (UDMMap.count(D) > 0)
9043 return;
9045 QualType Ty = D->getType();
9046 auto *MapperVarDecl =
9047 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9048 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9049 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9050
9051 CodeGenFunction MapperCGF(CGM);
9052 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9053 auto PrivatizeAndGenMapInfoCB =
9054 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9055 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9056 MapperCGF.Builder.restoreIP(CodeGenIP);
9057
9058 // Privatize the declared variable of mapper to be the current array
9059 // element.
9060 Address PtrCurrent(
9061 PtrPHI, ElemTy,
9062 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9063 .getAlignment()
9064 .alignmentOfArrayElement(ElementSize));
9066 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9067 (void)Scope.Privatize();
9068
9069 // Get map clause information.
9070 MappableExprsHandler MEHandler(*D, MapperCGF);
9071 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9072
9073 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9074 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
9075 };
9076 if (CGM.getCodeGenOpts().getDebugInfo() !=
9077 llvm::codegenoptions::NoDebugInfo) {
9078 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9079 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9080 FillInfoMap);
9081 }
9082
9083 return CombinedInfo;
9084 };
9085
9086 auto CustomMapperCB = [&](unsigned I, llvm::Function **MapperFunc) {
9087 if (CombinedInfo.Mappers[I]) {
9088 // Call the corresponding mapper function.
9090 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9091 assert(*MapperFunc && "Expect a valid mapper function is available.");
9092 return true;
9093 }
9094 return false;
9095 };
9096
9097 SmallString<64> TyStr;
9098 llvm::raw_svector_ostream Out(TyStr);
9100 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9101
9102 auto *NewFn = OMPBuilder.emitUserDefinedMapper(PrivatizeAndGenMapInfoCB,
9103 ElemTy, Name, CustomMapperCB);
9104 UDMMap.try_emplace(D, NewFn);
9105 if (CGF)
9106 FunctionUDMMap[CGF->CurFn].push_back(D);
9107}
9108
9110 const OMPDeclareMapperDecl *D) {
9111 auto I = UDMMap.find(D);
9112 if (I != UDMMap.end())
9113 return I->second;
9115 return UDMMap.lookup(D);
9116}
9117
9120 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9121 const OMPLoopDirective &D)>
9122 SizeEmitter) {
9123 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9124 const OMPExecutableDirective *TD = &D;
9125 // Get nested teams distribute kind directive, if any. For now, treat
9126 // 'target_teams_loop' as if it's really a target_teams_distribute.
9127 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9128 Kind != OMPD_target_teams_loop)
9130 if (!TD)
9131 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9132
9133 const auto *LD = cast<OMPLoopDirective>(TD);
9134 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9135 return NumIterations;
9136 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9137}
9138
9139static void
9140emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9143 bool RequiresOuterTask, const CapturedStmt &CS,
9144 bool OffloadingMandatory, CodeGenFunction &CGF) {
9145 if (OffloadingMandatory) {
9146 CGF.Builder.CreateUnreachable();
9147 } else {
9148 if (RequiresOuterTask) {
9149 CapturedVars.clear();
9150 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9151 }
9152 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9153 CapturedVars);
9154 }
9155}
9156
9157static llvm::Value *emitDeviceID(
9158 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9159 CodeGenFunction &CGF) {
9160 // Emit device ID if any.
9161 llvm::Value *DeviceID;
9162 if (Device.getPointer()) {
9163 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9164 Device.getInt() == OMPC_DEVICE_device_num) &&
9165 "Expected device_num modifier.");
9166 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9167 DeviceID =
9168 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9169 } else {
9170 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9171 }
9172 return DeviceID;
9173}
9174
9175static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9176 CodeGenFunction &CGF) {
9177 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9178
9179 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9180 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9181 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9182 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9183 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9184 /*isSigned=*/false);
9185 }
9186 return DynCGroupMem;
9187}
9189 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9190 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9191 llvm::OpenMPIRBuilder &OMPBuilder,
9192 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
9193 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9194
9195 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9196 auto RI = CS.getCapturedRecordDecl()->field_begin();
9197 auto *CV = CapturedVars.begin();
9199 CE = CS.capture_end();
9200 CI != CE; ++CI, ++RI, ++CV) {
9201 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9202 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9203
9204 // VLA sizes are passed to the outlined region by copy and do not have map
9205 // information associated.
9206 if (CI->capturesVariableArrayType()) {
9207 CurInfo.Exprs.push_back(nullptr);
9208 CurInfo.BasePointers.push_back(*CV);
9209 CurInfo.DevicePtrDecls.push_back(nullptr);
9210 CurInfo.DevicePointers.push_back(
9211 MappableExprsHandler::DeviceInfoTy::None);
9212 CurInfo.Pointers.push_back(*CV);
9213 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9214 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9215 // Copy to the device as an argument. No need to retrieve it.
9216 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9217 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9218 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9219 CurInfo.Mappers.push_back(nullptr);
9220 } else {
9221 // If we have any information in the map clause, we use it, otherwise we
9222 // just do a default mapping.
9223 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9224 if (!CI->capturesThis())
9225 MappedVarSet.insert(CI->getCapturedVar());
9226 else
9227 MappedVarSet.insert(nullptr);
9228 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9229 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9230 // Generate correct mapping for variables captured by reference in
9231 // lambdas.
9232 if (CI->capturesVariable())
9233 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9234 CurInfo, LambdaPointers);
9235 }
9236 // We expect to have at least an element of information for this capture.
9237 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9238 "Non-existing map pointer for capture!");
9239 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9240 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9241 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9242 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9243 "Inconsistent map information sizes!");
9244
9245 // If there is an entry in PartialStruct it means we have a struct with
9246 // individual members mapped. Emit an extra combined entry.
9247 if (PartialStruct.Base.isValid()) {
9248 CombinedInfo.append(PartialStruct.PreliminaryMapData);
9249 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
9250 CI->capturesThis(), OMPBuilder, nullptr,
9251 /*NotTargetParams*/ false);
9252 }
9253
9254 // We need to append the results of this capture to what we already have.
9255 CombinedInfo.append(CurInfo);
9256 }
9257 // Adjust MEMBER_OF flags for the lambdas captures.
9258 MEHandler.adjustMemberOfForLambdaCaptures(
9259 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9260 CombinedInfo.Pointers, CombinedInfo.Types);
9261}
9262static void
9263genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9264 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9265 llvm::OpenMPIRBuilder &OMPBuilder,
9266 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
9267 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
9268
9269 CodeGenModule &CGM = CGF.CGM;
9270 // Map any list items in a map clause that were not captures because they
9271 // weren't referenced within the construct.
9272 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
9273
9274 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9275 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9276 };
9277 if (CGM.getCodeGenOpts().getDebugInfo() !=
9278 llvm::codegenoptions::NoDebugInfo) {
9279 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9280 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9281 FillInfoMap);
9282 }
9283}
9284
9286 const CapturedStmt &CS,
9288 llvm::OpenMPIRBuilder &OMPBuilder,
9289 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9290 // Get mappable expression information.
9291 MappableExprsHandler MEHandler(D, CGF);
9292 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9293
9294 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
9295 MappedVarSet, CombinedInfo);
9296 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
9297}
9298
9299template <typename ClauseTy>
9300static void
9304 const auto *C = D.getSingleClause<ClauseTy>();
9305 assert(!C->varlist_empty() &&
9306 "ompx_bare requires explicit num_teams and thread_limit");
9307 CodeGenFunction::RunCleanupsScope Scope(CGF);
9308 for (auto *E : C->varlist()) {
9309 llvm::Value *V = CGF.EmitScalarExpr(E);
9310 Values.push_back(
9311 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
9312 }
9313}
9314
9316 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9318 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9319 const CapturedStmt &CS, bool OffloadingMandatory,
9320 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9321 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9322 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9323 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9324 const OMPLoopDirective &D)>
9325 SizeEmitter,
9326 CodeGenFunction &CGF, CodeGenModule &CGM) {
9327 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9328
9329 // Fill up the arrays with all the captured variables.
9330 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9332 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
9333
9334 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
9335 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
9336
9337 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9338 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9339 CGF.VoidPtrTy, CGM.getPointerAlign());
9340 InputInfo.PointersArray =
9341 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9342 InputInfo.SizesArray =
9343 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9344 InputInfo.MappersArray =
9345 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9346 MapTypesArray = Info.RTArgs.MapTypesArray;
9347 MapNamesArray = Info.RTArgs.MapNamesArray;
9348
9349 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9350 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9351 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9352 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9353 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9354
9355 if (IsReverseOffloading) {
9356 // Reverse offloading is not supported, so just execute on the host.
9357 // FIXME: This fallback solution is incorrect since it ignores the
9358 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9359 // assert here and ensure SEMA emits an error.
9360 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9361 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9362 return;
9363 }
9364
9365 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9366 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9367
9368 llvm::Value *BasePointersArray =
9369 InputInfo.BasePointersArray.emitRawPointer(CGF);
9370 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9371 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9372 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9373
9374 auto &&EmitTargetCallFallbackCB =
9375 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9376 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9377 -> llvm::OpenMPIRBuilder::InsertPointTy {
9378 CGF.Builder.restoreIP(IP);
9379 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9380 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9381 return CGF.Builder.saveIP();
9382 };
9383
9384 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
9387 if (IsBare) {
9388 emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, NumTeams);
9389 emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
9390 NumThreads);
9391 } else {
9392 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
9393 NumThreads.push_back(
9394 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
9395 }
9396
9397 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9398 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9399 llvm::Value *NumIterations =
9400 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9401 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9402 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9403 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9404
9405 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9406 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9407 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9408
9409 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9410 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9411 DynCGGroupMem, HasNoWait);
9412
9413 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
9414 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9415 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
9416 RTLoc, AllocaIP));
9417 CGF.Builder.restoreIP(AfterIP);
9418 };
9419
9420 if (RequiresOuterTask)
9421 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9422 else
9423 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9424}
9425
9426static void
9427emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9430 bool RequiresOuterTask, const CapturedStmt &CS,
9431 bool OffloadingMandatory, CodeGenFunction &CGF) {
9432
9433 // Notify that the host version must be executed.
9434 auto &&ElseGen =
9435 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9436 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9437 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9438 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9439 };
9440
9441 if (RequiresOuterTask) {
9442 CodeGenFunction::OMPTargetDataInfo InputInfo;
9443 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9444 } else {
9445 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9446 }
9447}
9448
9451 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9452 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9453 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9454 const OMPLoopDirective &D)>
9455 SizeEmitter) {
9456 if (!CGF.HaveInsertPoint())
9457 return;
9458
9459 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9460 CGM.getLangOpts().OpenMPOffloadMandatory;
9461
9462 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9463
9464 const bool RequiresOuterTask =
9465 D.hasClausesOfKind<OMPDependClause>() ||
9466 D.hasClausesOfKind<OMPNowaitClause>() ||
9467 D.hasClausesOfKind<OMPInReductionClause>() ||
9468 (CGM.getLangOpts().OpenMP >= 51 &&
9469 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9470 D.hasClausesOfKind<OMPThreadLimitClause>());
9472 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9473 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9474 PrePostActionTy &) {
9475 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9476 };
9477 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9478
9480 llvm::Value *MapTypesArray = nullptr;
9481 llvm::Value *MapNamesArray = nullptr;
9482
9483 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9484 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9485 OutlinedFnID, &InputInfo, &MapTypesArray,
9486 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9487 PrePostActionTy &) {
9488 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9489 RequiresOuterTask, CS, OffloadingMandatory,
9490 Device, OutlinedFnID, InputInfo, MapTypesArray,
9491 MapNamesArray, SizeEmitter, CGF, CGM);
9492 };
9493
9494 auto &&TargetElseGen =
9495 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9496 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9497 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9498 CS, OffloadingMandatory, CGF);
9499 };
9500
9501 // If we have a target function ID it means that we need to support
9502 // offloading, otherwise, just execute on the host. We need to execute on host
9503 // regardless of the conditional in the if clause if, e.g., the user do not
9504 // specify target triples.
9505 if (OutlinedFnID) {
9506 if (IfCond) {
9507 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9508 } else {
9509 RegionCodeGenTy ThenRCG(TargetThenGen);
9510 ThenRCG(CGF);
9511 }
9512 } else {
9513 RegionCodeGenTy ElseRCG(TargetElseGen);
9514 ElseRCG(CGF);
9515 }
9516}
9517
9519 StringRef ParentName) {
9520 if (!S)
9521 return;
9522
9523 // Codegen OMP target directives that offload compute to the device.
9524 bool RequiresDeviceCodegen =
9525 isa<OMPExecutableDirective>(S) &&
9527 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9528
9529 if (RequiresDeviceCodegen) {
9530 const auto &E = *cast<OMPExecutableDirective>(S);
9531
9532 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9533 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9534
9535 // Is this a target region that should not be emitted as an entry point? If
9536 // so just signal we are done with this target region.
9537 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9538 return;
9539
9540 switch (E.getDirectiveKind()) {
9541 case OMPD_target:
9543 cast<OMPTargetDirective>(E));
9544 break;
9545 case OMPD_target_parallel:
9547 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9548 break;
9549 case OMPD_target_teams:
9551 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9552 break;
9553 case OMPD_target_teams_distribute:
9555 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9556 break;
9557 case OMPD_target_teams_distribute_simd:
9559 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9560 break;
9561 case OMPD_target_parallel_for:
9563 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9564 break;
9565 case OMPD_target_parallel_for_simd:
9567 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9568 break;
9569 case OMPD_target_simd:
9571 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9572 break;
9573 case OMPD_target_teams_distribute_parallel_for:
9575 CGM, ParentName,
9576 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9577 break;
9578 case OMPD_target_teams_distribute_parallel_for_simd:
9581 CGM, ParentName,
9582 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9583 break;
9584 case OMPD_target_teams_loop:
9586 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9587 break;
9588 case OMPD_target_parallel_loop:
9590 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9591 break;
9592 case OMPD_parallel:
9593 case OMPD_for:
9594 case OMPD_parallel_for:
9595 case OMPD_parallel_master:
9596 case OMPD_parallel_sections:
9597 case OMPD_for_simd:
9598 case OMPD_parallel_for_simd:
9599 case OMPD_cancel:
9600 case OMPD_cancellation_point:
9601 case OMPD_ordered:
9602 case OMPD_threadprivate:
9603 case OMPD_allocate:
9604 case OMPD_task:
9605 case OMPD_simd:
9606 case OMPD_tile:
9607 case OMPD_unroll:
9608 case OMPD_sections:
9609 case OMPD_section:
9610 case OMPD_single:
9611 case OMPD_master:
9612 case OMPD_critical:
9613 case OMPD_taskyield:
9614 case OMPD_barrier:
9615 case OMPD_taskwait:
9616 case OMPD_taskgroup:
9617 case OMPD_atomic:
9618 case OMPD_flush:
9619 case OMPD_depobj:
9620 case OMPD_scan:
9621 case OMPD_teams:
9622 case OMPD_target_data:
9623 case OMPD_target_exit_data:
9624 case OMPD_target_enter_data:
9625 case OMPD_distribute:
9626 case OMPD_distribute_simd:
9627 case OMPD_distribute_parallel_for:
9628 case OMPD_distribute_parallel_for_simd:
9629 case OMPD_teams_distribute:
9630 case OMPD_teams_distribute_simd:
9631 case OMPD_teams_distribute_parallel_for:
9632 case OMPD_teams_distribute_parallel_for_simd:
9633 case OMPD_target_update:
9634 case OMPD_declare_simd:
9635 case OMPD_declare_variant:
9636 case OMPD_begin_declare_variant:
9637 case OMPD_end_declare_variant:
9638 case OMPD_declare_target:
9639 case OMPD_end_declare_target:
9640 case OMPD_declare_reduction:
9641 case OMPD_declare_mapper:
9642 case OMPD_taskloop:
9643 case OMPD_taskloop_simd:
9644 case OMPD_master_taskloop:
9645 case OMPD_master_taskloop_simd:
9646 case OMPD_parallel_master_taskloop:
9647 case OMPD_parallel_master_taskloop_simd:
9648 case OMPD_requires:
9649 case OMPD_metadirective:
9650 case OMPD_unknown:
9651 default:
9652 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9653 }
9654 return;
9655 }
9656
9657 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9658 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9659 return;
9660
9661 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9662 return;
9663 }
9664
9665 // If this is a lambda function, look into its body.
9666 if (const auto *L = dyn_cast<LambdaExpr>(S))
9667 S = L->getBody();
9668
9669 // Keep looking for target regions recursively.
9670 for (const Stmt *II : S->children())
9671 scanForTargetRegionsFunctions(II, ParentName);
9672}
9673
9674static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9675 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9676 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9677 if (!DevTy)
9678 return false;
9679 // Do not emit device_type(nohost) functions for the host.
9680 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9681 return true;
9682 // Do not emit device_type(host) functions for the device.
9683 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9684 return true;
9685 return false;
9686}
9687
9689 // If emitting code for the host, we do not process FD here. Instead we do
9690 // the normal code generation.
9691 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9692 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9693 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9694 CGM.getLangOpts().OpenMPIsTargetDevice))
9695 return true;
9696 return false;
9697 }
9698
9699 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9700 // Try to detect target regions in the function.
9701 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9702 StringRef Name = CGM.getMangledName(GD);
9703 scanForTargetRegionsFunctions(FD->getBody(), Name);
9704 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9705 CGM.getLangOpts().OpenMPIsTargetDevice))
9706 return true;
9707 }
9708
9709 // Do not to emit function if it is not marked as declare target.
9710 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9711 AlreadyEmittedTargetDecls.count(VD) == 0;
9712}
9713
9715 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9716 CGM.getLangOpts().OpenMPIsTargetDevice))
9717 return true;
9718
9719 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9720 return false;
9721
9722 // Check if there are Ctors/Dtors in this declaration and look for target
9723 // regions in it. We use the complete variant to produce the kernel name
9724 // mangling.
9725 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9726 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9727 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9728 StringRef ParentName =
9730 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9731 }
9732 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9733 StringRef ParentName =
9735 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9736 }
9737 }
9738
9739 // Do not to emit variable if it is not marked as declare target.
9740 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9741 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9742 cast<VarDecl>(GD.getDecl()));
9743 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9744 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9745 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9747 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9748 return true;
9749 }
9750 return false;
9751}
9752
9754 llvm::Constant *Addr) {
9755 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9756 !CGM.getLangOpts().OpenMPIsTargetDevice)
9757 return;
9758
9759 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9760 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9761
9762 // If this is an 'extern' declaration we defer to the canonical definition and
9763 // do not emit an offloading entry.
9764 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
9765 VD->hasExternalStorage())
9766 return;
9767
9768 if (!Res) {
9769 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9770 // Register non-target variables being emitted in device code (debug info
9771 // may cause this).
9772 StringRef VarName = CGM.getMangledName(VD);
9773 EmittedNonTargetVariables.try_emplace(VarName, Addr);
9774 }
9775 return;
9776 }
9777
9778 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
9779 auto LinkageForVariable = [&VD, this]() {
9781 };
9782
9783 std::vector<llvm::GlobalVariable *> GeneratedRefs;
9784 OMPBuilder.registerTargetGlobalVariable(
9787 VD->isExternallyVisible(),
9789 VD->getCanonicalDecl()->getBeginLoc()),
9790 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
9791 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
9794 Addr);
9795
9796 for (auto *ref : GeneratedRefs)
9798}
9799
9801 if (isa<FunctionDecl>(GD.getDecl()) ||
9802 isa<OMPDeclareReductionDecl>(GD.getDecl()))
9803 return emitTargetFunctions(GD);
9804
9805 return emitTargetGlobalVariable(GD);
9806}
9807
9809 for (const VarDecl *VD : DeferredGlobalVariables) {
9810 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9811 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9812 if (!Res)
9813 continue;
9814 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9815 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9817 CGM.EmitGlobal(VD);
9818 } else {
9819 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9820 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9821 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9823 "Expected link clause or to clause with unified memory.");
9825 }
9826 }
9827}
9828
9830 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9831 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9832 " Expected target-based directive.");
9833}
9834
9836 for (const OMPClause *Clause : D->clauselists()) {
9837 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9839 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
9840 } else if (const auto *AC =
9841 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9842 switch (AC->getAtomicDefaultMemOrderKind()) {
9843 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9844 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9845 break;
9846 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9847 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
9848 break;
9849 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
9850 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
9851 break;
9853 break;
9854 }
9855 }
9856 }
9857}
9858
9859llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
9861}
9862
9864 LangAS &AS) {
9865 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9866 return false;
9867 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9868 switch(A->getAllocatorType()) {
9869 case OMPAllocateDeclAttr::OMPNullMemAlloc:
9870 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9871 // Not supported, fallback to the default mem space.
9872 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9873 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9874 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9875 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9876 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9877 case OMPAllocateDeclAttr::OMPConstMemAlloc:
9878 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9879 AS = LangAS::Default;
9880 return true;
9881 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9882 llvm_unreachable("Expected predefined allocator for the variables with the "
9883 "static storage.");
9884 }
9885 return false;
9886}
9887
9890}
9891
9893 CodeGenModule &CGM)
9894 : CGM(CGM) {
9895 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9896 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9898 }
9899}
9900
9902 if (CGM.getLangOpts().OpenMPIsTargetDevice)
9903 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9904}
9905
9907 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
9908 return true;
9909
9910 const auto *D = cast<FunctionDecl>(GD.getDecl());
9911 // Do not to emit function if it is marked as declare target as it was already
9912 // emitted.
9913 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9914 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
9915 if (auto *F = dyn_cast_or_null<llvm::Function>(
9917 return !F->isDeclaration();
9918 return false;
9919 }
9920 return true;
9921 }
9922
9923 return !AlreadyEmittedTargetDecls.insert(D).second;
9924}
9925
9929 llvm::Function *OutlinedFn,
9930 ArrayRef<llvm::Value *> CapturedVars) {
9931 if (!CGF.HaveInsertPoint())
9932 return;
9933
9934 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9936
9937 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9938 llvm::Value *Args[] = {
9939 RTLoc,
9940 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9941 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9943 RealArgs.append(std::begin(Args), std::end(Args));
9944 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9945
9946 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
9947 CGM.getModule(), OMPRTL___kmpc_fork_teams);
9948 CGF.EmitRuntimeCall(RTLFn, RealArgs);
9949}
9950
9952 const Expr *NumTeams,
9953 const Expr *ThreadLimit,
9955 if (!CGF.HaveInsertPoint())
9956 return;
9957
9958 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9959
9960 llvm::Value *NumTeamsVal =
9961 NumTeams
9962 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9963 CGF.CGM.Int32Ty, /* isSigned = */ true)
9964 : CGF.Builder.getInt32(0);
9965
9966 llvm::Value *ThreadLimitVal =
9967 ThreadLimit
9968 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9969 CGF.CGM.Int32Ty, /* isSigned = */ true)
9970 : CGF.Builder.getInt32(0);
9971
9972 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9973 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9974 ThreadLimitVal};
9975 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
9976 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
9977 PushNumTeamsArgs);
9978}
9979
9981 const Expr *ThreadLimit,
9983 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9984 llvm::Value *ThreadLimitVal =
9985 ThreadLimit
9986 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9987 CGF.CGM.Int32Ty, /* isSigned = */ true)
9988 : CGF.Builder.getInt32(0);
9989
9990 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
9991 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
9992 ThreadLimitVal};
9993 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
9994 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
9995 ThreadLimitArgs);
9996}
9997
9999 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10000 const Expr *Device, const RegionCodeGenTy &CodeGen,
10002 if (!CGF.HaveInsertPoint())
10003 return;
10004
10005 // Action used to replace the default codegen action and turn privatization
10006 // off.
10007 PrePostActionTy NoPrivAction;
10008
10009 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10010
10011 llvm::Value *IfCondVal = nullptr;
10012 if (IfCond)
10013 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10014
10015 // Emit device ID if any.
10016 llvm::Value *DeviceID = nullptr;
10017 if (Device) {
10018 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10019 CGF.Int64Ty, /*isSigned=*/true);
10020 } else {
10021 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10022 }
10023
10024 // Fill up the arrays with all the mapped variables.
10025 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10026 auto GenMapInfoCB =
10027 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10028 CGF.Builder.restoreIP(CodeGenIP);
10029 // Get map clause information.
10030 MappableExprsHandler MEHandler(D, CGF);
10031 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10032
10033 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10034 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10035 };
10036 if (CGM.getCodeGenOpts().getDebugInfo() !=
10037 llvm::codegenoptions::NoDebugInfo) {
10038 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10039 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10040 FillInfoMap);
10041 }
10042
10043 return CombinedInfo;
10044 };
10045 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10046 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10047 CGF.Builder.restoreIP(CodeGenIP);
10048 switch (BodyGenType) {
10049 case BodyGenTy::Priv:
10050 if (!Info.CaptureDeviceAddrMap.empty())
10051 CodeGen(CGF);
10052 break;
10053 case BodyGenTy::DupNoPriv:
10054 if (!Info.CaptureDeviceAddrMap.empty()) {
10055 CodeGen.setAction(NoPrivAction);
10056 CodeGen(CGF);
10057 }
10058 break;
10059 case BodyGenTy::NoPriv:
10060 if (Info.CaptureDeviceAddrMap.empty()) {
10061 CodeGen.setAction(NoPrivAction);
10062 CodeGen(CGF);
10063 }
10064 break;
10065 }
10066 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10067 CGF.Builder.GetInsertPoint());
10068 };
10069
10070 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10071 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10072 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10073 }
10074 };
10075
10076 auto CustomMapperCB = [&](unsigned int I) {
10077 llvm::Value *MFunc = nullptr;
10078 if (CombinedInfo.Mappers[I]) {
10079 Info.HasMapper = true;
10081 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10082 }
10083 return MFunc;
10084 };
10085
10086 // Source location for the ident struct
10087 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10088
10089 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10090 CGF.AllocaInsertPt->getIterator());
10091 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10092 CGF.Builder.GetInsertPoint());
10093 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10094 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10095 cantFail(OMPBuilder.createTargetData(
10096 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10097 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10098 CGF.Builder.restoreIP(AfterIP);
10099}
10100
10102 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10103 const Expr *Device) {
10104 if (!CGF.HaveInsertPoint())
10105 return;
10106
10107 assert((isa<OMPTargetEnterDataDirective>(D) ||
10108 isa<OMPTargetExitDataDirective>(D) ||
10109 isa<OMPTargetUpdateDirective>(D)) &&
10110 "Expecting either target enter, exit data, or update directives.");
10111
10113 llvm::Value *MapTypesArray = nullptr;
10114 llvm::Value *MapNamesArray = nullptr;
10115 // Generate the code for the opening of the data environment.
10116 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10117 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10118 // Emit device ID if any.
10119 llvm::Value *DeviceID = nullptr;
10120 if (Device) {
10121 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10122 CGF.Int64Ty, /*isSigned=*/true);
10123 } else {
10124 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10125 }
10126
10127 // Emit the number of elements in the offloading arrays.
10128 llvm::Constant *PointerNum =
10129 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10130
10131 // Source location for the ident struct
10132 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10133
10134 SmallVector<llvm::Value *, 13> OffloadingArgs(
10135 {RTLoc, DeviceID, PointerNum,
10136 InputInfo.BasePointersArray.emitRawPointer(CGF),
10137 InputInfo.PointersArray.emitRawPointer(CGF),
10138 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10139 InputInfo.MappersArray.emitRawPointer(CGF)});
10140
10141 // Select the right runtime function call for each standalone
10142 // directive.
10143 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10144 RuntimeFunction RTLFn;
10145 switch (D.getDirectiveKind()) {
10146 case OMPD_target_enter_data:
10147 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10148 : OMPRTL___tgt_target_data_begin_mapper;
10149 break;
10150 case OMPD_target_exit_data:
10151 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10152 : OMPRTL___tgt_target_data_end_mapper;
10153 break;
10154 case OMPD_target_update:
10155 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10156 : OMPRTL___tgt_target_data_update_mapper;
10157 break;
10158 case OMPD_parallel:
10159 case OMPD_for:
10160 case OMPD_parallel_for:
10161 case OMPD_parallel_master:
10162 case OMPD_parallel_sections:
10163 case OMPD_for_simd:
10164 case OMPD_parallel_for_simd:
10165 case OMPD_cancel:
10166 case OMPD_cancellation_point:
10167 case OMPD_ordered:
10168 case OMPD_threadprivate:
10169 case OMPD_allocate:
10170 case OMPD_task:
10171 case OMPD_simd:
10172 case OMPD_tile:
10173 case OMPD_unroll:
10174 case OMPD_sections:
10175 case OMPD_section:
10176 case OMPD_single:
10177 case OMPD_master:
10178 case OMPD_critical:
10179 case OMPD_taskyield:
10180 case OMPD_barrier:
10181 case OMPD_taskwait:
10182 case OMPD_taskgroup:
10183 case OMPD_atomic:
10184 case OMPD_flush:
10185 case OMPD_depobj:
10186 case OMPD_scan:
10187 case OMPD_teams:
10188 case OMPD_target_data:
10189 case OMPD_distribute:
10190 case OMPD_distribute_simd:
10191 case OMPD_distribute_parallel_for:
10192 case OMPD_distribute_parallel_for_simd:
10193 case OMPD_teams_distribute:
10194 case OMPD_teams_distribute_simd:
10195 case OMPD_teams_distribute_parallel_for:
10196 case OMPD_teams_distribute_parallel_for_simd:
10197 case OMPD_declare_simd:
10198 case OMPD_declare_variant:
10199 case OMPD_begin_declare_variant:
10200 case OMPD_end_declare_variant:
10201 case OMPD_declare_target:
10202 case OMPD_end_declare_target:
10203 case OMPD_declare_reduction:
10204 case OMPD_declare_mapper:
10205 case OMPD_taskloop:
10206 case OMPD_taskloop_simd:
10207 case OMPD_master_taskloop:
10208 case OMPD_master_taskloop_simd:
10209 case OMPD_parallel_master_taskloop:
10210 case OMPD_parallel_master_taskloop_simd:
10211 case OMPD_target:
10212 case OMPD_target_simd:
10213 case OMPD_target_teams_distribute:
10214 case OMPD_target_teams_distribute_simd:
10215 case OMPD_target_teams_distribute_parallel_for:
10216 case OMPD_target_teams_distribute_parallel_for_simd:
10217 case OMPD_target_teams:
10218 case OMPD_target_parallel:
10219 case OMPD_target_parallel_for:
10220 case OMPD_target_parallel_for_simd:
10221 case OMPD_requires:
10222 case OMPD_metadirective:
10223 case OMPD_unknown:
10224 default:
10225 llvm_unreachable("Unexpected standalone target data directive.");
10226 break;
10227 }
10228 if (HasNowait) {
10229 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10230 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10231 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10232 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10233 }
10234 CGF.EmitRuntimeCall(
10235 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10236 OffloadingArgs);
10237 };
10238
10239 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10240 &MapNamesArray](CodeGenFunction &CGF,
10241 PrePostActionTy &) {
10242 // Fill up the arrays with all the mapped variables.
10243 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10245 MappableExprsHandler MEHandler(D, CGF);
10246 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
10247 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10248 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10249
10250 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10251 D.hasClausesOfKind<OMPNowaitClause>();
10252
10253 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10254 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10256 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10258 InputInfo.SizesArray =
10259 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10260 InputInfo.MappersArray =
10261 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10262 MapTypesArray = Info.RTArgs.MapTypesArray;
10263 MapNamesArray = Info.RTArgs.MapNamesArray;
10264 if (RequiresOuterTask)
10265 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10266 else
10267 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10268 };
10269
10270 if (IfCond) {
10271 emitIfClause(CGF, IfCond, TargetThenGen,
10272 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10273 } else {
10274 RegionCodeGenTy ThenRCG(TargetThenGen);
10275 ThenRCG(CGF);
10276 }
10277}
10278
10279namespace {
10280 /// Kind of parameter in a function with 'declare simd' directive.
10281enum ParamKindTy {
10282 Linear,
10283 LinearRef,
10284 LinearUVal,
10285 LinearVal,
10286 Uniform,
10287 Vector,
10288};
10289/// Attribute set of the parameter.
10290struct ParamAttrTy {
10291 ParamKindTy Kind = Vector;
10292 llvm::APSInt StrideOrArg;
10293 llvm::APSInt Alignment;
10294 bool HasVarStride = false;
10295};
10296} // namespace
10297
10298static unsigned evaluateCDTSize(const FunctionDecl *FD,
10299 ArrayRef<ParamAttrTy> ParamAttrs) {
10300 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10301 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10302 // of that clause. The VLEN value must be power of 2.
10303 // In other case the notion of the function`s "characteristic data type" (CDT)
10304 // is used to compute the vector length.
10305 // CDT is defined in the following order:
10306 // a) For non-void function, the CDT is the return type.
10307 // b) If the function has any non-uniform, non-linear parameters, then the
10308 // CDT is the type of the first such parameter.
10309 // c) If the CDT determined by a) or b) above is struct, union, or class
10310 // type which is pass-by-value (except for the type that maps to the
10311 // built-in complex data type), the characteristic data type is int.
10312 // d) If none of the above three cases is applicable, the CDT is int.
10313 // The VLEN is then determined based on the CDT and the size of vector
10314 // register of that ISA for which current vector version is generated. The
10315 // VLEN is computed using the formula below:
10316 // VLEN = sizeof(vector_register) / sizeof(CDT),
10317 // where vector register size specified in section 3.2.1 Registers and the
10318 // Stack Frame of original AMD64 ABI document.
10319 QualType RetType = FD->getReturnType();
10320 if (RetType.isNull())
10321 return 0;
10322 ASTContext &C = FD->getASTContext();
10323 QualType CDT;
10324 if (!RetType.isNull() && !RetType->isVoidType()) {
10325 CDT = RetType;
10326 } else {
10327 unsigned Offset = 0;
10328 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10329 if (ParamAttrs[Offset].Kind == Vector)
10330 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10331 ++Offset;
10332 }
10333 if (CDT.isNull()) {
10334 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10335 if (ParamAttrs[I + Offset].Kind == Vector) {
10336 CDT = FD->getParamDecl(I)->getType();
10337 break;
10338 }
10339 }
10340 }
10341 }
10342 if (CDT.isNull())
10343 CDT = C.IntTy;
10344 CDT = CDT->getCanonicalTypeUnqualified();
10345 if (CDT->isRecordType() || CDT->isUnionType())
10346 CDT = C.IntTy;
10347 return C.getTypeSize(CDT);
10348}
10349
10350/// Mangle the parameter part of the vector function name according to
10351/// their OpenMP classification. The mangling function is defined in
10352/// section 4.5 of the AAVFABI(2021Q1).
10353static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10354 SmallString<256> Buffer;
10355 llvm::raw_svector_ostream Out(Buffer);
10356 for (const auto &ParamAttr : ParamAttrs) {
10357 switch (ParamAttr.Kind) {
10358 case Linear:
10359 Out << 'l';
10360 break;
10361 case LinearRef:
10362 Out << 'R';
10363 break;
10364 case LinearUVal:
10365 Out << 'U';
10366 break;
10367 case LinearVal:
10368 Out << 'L';
10369 break;
10370 case Uniform:
10371 Out << 'u';
10372 break;
10373 case Vector:
10374 Out << 'v';
10375 break;
10376 }
10377 if (ParamAttr.HasVarStride)
10378 Out << "s" << ParamAttr.StrideOrArg;
10379 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10380 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10381 // Don't print the step value if it is not present or if it is
10382 // equal to 1.
10383 if (ParamAttr.StrideOrArg < 0)
10384 Out << 'n' << -ParamAttr.StrideOrArg;
10385 else if (ParamAttr.StrideOrArg != 1)
10386 Out << ParamAttr.StrideOrArg;
10387 }
10388
10389 if (!!ParamAttr.Alignment)
10390 Out << 'a' << ParamAttr.Alignment;
10391 }
10392
10393 return std::string(Out.str());
10394}
10395
10396static void
10397emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10398 const llvm::APSInt &VLENVal,
10399 ArrayRef<ParamAttrTy> ParamAttrs,
10400 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10401 struct ISADataTy {
10402 char ISA;
10403 unsigned VecRegSize;
10404 };
10405 ISADataTy ISAData[] = {
10406 {
10407 'b', 128
10408 }, // SSE
10409 {
10410 'c', 256
10411 }, // AVX
10412 {
10413 'd', 256
10414 }, // AVX2
10415 {
10416 'e', 512
10417 }, // AVX512
10418 };
10420 switch (State) {
10421 case OMPDeclareSimdDeclAttr::BS_Undefined:
10422 Masked.push_back('N');
10423 Masked.push_back('M');
10424 break;
10425 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10426 Masked.push_back('N');
10427 break;
10428 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10429 Masked.push_back('M');
10430 break;
10431 }
10432 for (char Mask : Masked) {
10433 for (const ISADataTy &Data : ISAData) {
10434 SmallString<256> Buffer;
10435 llvm::raw_svector_ostream Out(Buffer);
10436 Out << "_ZGV" << Data.ISA << Mask;
10437 if (!VLENVal) {
10438 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10439 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10440 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10441 } else {
10442 Out << VLENVal;
10443 }
10444 Out << mangleVectorParameters(ParamAttrs);
10445 Out << '_' << Fn->getName();
10446 Fn->addFnAttr(Out.str());
10447 }
10448 }
10449}
10450
10451// This are the Functions that are needed to mangle the name of the
10452// vector functions generated by the compiler, according to the rules
10453// defined in the "Vector Function ABI specifications for AArch64",
10454// available at
10455// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10456
10457/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10458static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10459 QT = QT.getCanonicalType();
10460
10461 if (QT->isVoidType())
10462 return false;
10463
10464 if (Kind == ParamKindTy::Uniform)
10465 return false;
10466
10467 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10468 return false;
10469
10470 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10471 !QT->isReferenceType())
10472 return false;
10473
10474 return true;
10475}
10476
10477/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10479 QT = QT.getCanonicalType();
10480 unsigned Size = C.getTypeSize(QT);
10481
10482 // Only scalars and complex within 16 bytes wide set PVB to true.
10483 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10484 return false;
10485
10486 if (QT->isFloatingType())
10487 return true;
10488
10489 if (QT->isIntegerType())
10490 return true;
10491
10492 if (QT->isPointerType())
10493 return true;
10494
10495 // TODO: Add support for complex types (section 3.1.2, item 2).
10496
10497 return false;
10498}
10499
10500/// Computes the lane size (LS) of a return type or of an input parameter,
10501/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10502/// TODO: Add support for references, section 3.2.1, item 1.
10503static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10504 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10506 if (getAArch64PBV(PTy, C))
10507 return C.getTypeSize(PTy);
10508 }
10509 if (getAArch64PBV(QT, C))
10510 return C.getTypeSize(QT);
10511
10512 return C.getTypeSize(C.getUIntPtrType());
10513}
10514
10515// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10516// signature of the scalar function, as defined in 3.2.2 of the
10517// AAVFABI.
10518static std::tuple<unsigned, unsigned, bool>
10520 QualType RetType = FD->getReturnType().getCanonicalType();
10521
10522 ASTContext &C = FD->getASTContext();
10523
10524 bool OutputBecomesInput = false;
10525
10527 if (!RetType->isVoidType()) {
10528 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10529 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10530 OutputBecomesInput = true;
10531 }
10532 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10534 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10535 }
10536
10537 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10538 // The LS of a function parameter / return value can only be a power
10539 // of 2, starting from 8 bits, up to 128.
10540 assert(llvm::all_of(Sizes,
10541 [](unsigned Size) {
10542 return Size == 8 || Size == 16 || Size == 32 ||
10543 Size == 64 || Size == 128;
10544 }) &&
10545 "Invalid size");
10546
10547 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10548 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10549 OutputBecomesInput);
10550}
10551
10552// Function used to add the attribute. The parameter `VLEN` is
10553// templated to allow the use of "x" when targeting scalable functions
10554// for SVE.
10555template <typename T>
10556static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10557 char ISA, StringRef ParSeq,
10558 StringRef MangledName, bool OutputBecomesInput,
10559 llvm::Function *Fn) {
10560 SmallString<256> Buffer;
10561 llvm::raw_svector_ostream Out(Buffer);
10562 Out << Prefix << ISA << LMask << VLEN;
10563 if (OutputBecomesInput)
10564 Out << "v";
10565 Out << ParSeq << "_" << MangledName;
10566 Fn->addFnAttr(Out.str());
10567}
10568
10569// Helper function to generate the Advanced SIMD names depending on
10570// the value of the NDS when simdlen is not present.
10571static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10572 StringRef Prefix, char ISA,
10573 StringRef ParSeq, StringRef MangledName,
10574 bool OutputBecomesInput,
10575 llvm::Function *Fn) {
10576 switch (NDS) {
10577 case 8:
10578 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10579 OutputBecomesInput, Fn);
10580 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10581 OutputBecomesInput, Fn);
10582 break;
10583 case 16:
10584 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10585 OutputBecomesInput, Fn);
10586 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10587 OutputBecomesInput, Fn);
10588 break;
10589 case 32:
10590 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10591 OutputBecomesInput, Fn);
10592 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10593 OutputBecomesInput, Fn);
10594 break;
10595 case 64:
10596 case 128:
10597 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10598 OutputBecomesInput, Fn);
10599 break;
10600 default:
10601 llvm_unreachable("Scalar type is too wide.");
10602 }
10603}
10604
10605/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10607 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10608 ArrayRef<ParamAttrTy> ParamAttrs,
10609 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10610 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10611
10612 // Get basic data for building the vector signature.
10613 const auto Data = getNDSWDS(FD, ParamAttrs);
10614 const unsigned NDS = std::get<0>(Data);
10615 const unsigned WDS = std::get<1>(Data);
10616 const bool OutputBecomesInput = std::get<2>(Data);
10617
10618 // Check the values provided via `simdlen` by the user.
10619 // 1. A `simdlen(1)` doesn't produce vector signatures,
10620 if (UserVLEN == 1) {
10621 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10623 "The clause simdlen(1) has no effect when targeting aarch64.");
10624 CGM.getDiags().Report(SLoc, DiagID);
10625 return;
10626 }
10627
10628 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10629 // Advanced SIMD output.
10630 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10631 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10632 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10633 "power of 2 when targeting Advanced SIMD.");
10634 CGM.getDiags().Report(SLoc, DiagID);
10635 return;
10636 }
10637
10638 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10639 // limits.
10640 if (ISA == 's' && UserVLEN != 0) {
10641 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10642 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10643 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10644 "lanes in the architectural constraints "
10645 "for SVE (min is 128-bit, max is "
10646 "2048-bit, by steps of 128-bit)");
10647 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10648 return;
10649 }
10650 }
10651
10652 // Sort out parameter sequence.
10653 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10654 StringRef Prefix = "_ZGV";
10655 // Generate simdlen from user input (if any).
10656 if (UserVLEN) {
10657 if (ISA == 's') {
10658 // SVE generates only a masked function.
10659 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10660 OutputBecomesInput, Fn);
10661 } else {
10662 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10663 // Advanced SIMD generates one or two functions, depending on
10664 // the `[not]inbranch` clause.
10665 switch (State) {
10666 case OMPDeclareSimdDeclAttr::BS_Undefined:
10667 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10668 OutputBecomesInput, Fn);
10669 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10670 OutputBecomesInput, Fn);
10671 break;
10672 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10673 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10674 OutputBecomesInput, Fn);
10675 break;
10676 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10677 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10678 OutputBecomesInput, Fn);
10679 break;
10680 }
10681 }
10682 } else {
10683 // If no user simdlen is provided, follow the AAVFABI rules for
10684 // generating the vector length.
10685 if (ISA == 's') {
10686 // SVE, section 3.4.1, item 1.
10687 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10688 OutputBecomesInput, Fn);
10689 } else {
10690 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10691 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10692 // two vector names depending on the use of the clause
10693 // `[not]inbranch`.
10694 switch (State) {
10695 case OMPDeclareSimdDeclAttr::BS_Undefined:
10696 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10697 OutputBecomesInput, Fn);
10698 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10699 OutputBecomesInput, Fn);
10700 break;
10701 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10702 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10703 OutputBecomesInput, Fn);
10704 break;
10705 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10706 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10707 OutputBecomesInput, Fn);
10708 break;
10709 }
10710 }
10711 }
10712}
10713
10715 llvm::Function *Fn) {
10717 FD = FD->getMostRecentDecl();
10718 while (FD) {
10719 // Map params to their positions in function decl.
10720 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10721 if (isa<CXXMethodDecl>(FD))
10722 ParamPositions.try_emplace(FD, 0);
10723 unsigned ParamPos = ParamPositions.size();
10724 for (const ParmVarDecl *P : FD->parameters()) {
10725 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10726 ++ParamPos;
10727 }
10728 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10729 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10730 // Mark uniform parameters.
10731 for (const Expr *E : Attr->uniforms()) {
10732 E = E->IgnoreParenImpCasts();
10733 unsigned Pos;
10734 if (isa<CXXThisExpr>(E)) {
10735 Pos = ParamPositions[FD];
10736 } else {
10737 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10738 ->getCanonicalDecl();
10739 auto It = ParamPositions.find(PVD);
10740 assert(It != ParamPositions.end() && "Function parameter not found");
10741 Pos = It->second;
10742 }
10743 ParamAttrs[Pos].Kind = Uniform;
10744 }
10745 // Get alignment info.
10746 auto *NI = Attr->alignments_begin();
10747 for (const Expr *E : Attr->aligneds()) {
10748 E = E->IgnoreParenImpCasts();
10749 unsigned Pos;
10750 QualType ParmTy;
10751 if (isa<CXXThisExpr>(E)) {
10752 Pos = ParamPositions[FD];
10753 ParmTy = E->getType();
10754 } else {
10755 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10756 ->getCanonicalDecl();
10757 auto It = ParamPositions.find(PVD);
10758 assert(It != ParamPositions.end() && "Function parameter not found");
10759 Pos = It->second;
10760 ParmTy = PVD->getType();
10761 }
10762 ParamAttrs[Pos].Alignment =
10763 (*NI)
10764 ? (*NI)->EvaluateKnownConstInt(C)
10765 : llvm::APSInt::getUnsigned(
10766 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10767 .getQuantity());
10768 ++NI;
10769 }
10770 // Mark linear parameters.
10771 auto *SI = Attr->steps_begin();
10772 auto *MI = Attr->modifiers_begin();
10773 for (const Expr *E : Attr->linears()) {
10774 E = E->IgnoreParenImpCasts();
10775 unsigned Pos;
10776 bool IsReferenceType = false;
10777 // Rescaling factor needed to compute the linear parameter
10778 // value in the mangled name.
10779 unsigned PtrRescalingFactor = 1;
10780 if (isa<CXXThisExpr>(E)) {
10781 Pos = ParamPositions[FD];
10782 auto *P = cast<PointerType>(E->getType());
10783 PtrRescalingFactor = CGM.getContext()
10784 .getTypeSizeInChars(P->getPointeeType())
10785 .getQuantity();
10786 } else {
10787 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10788 ->getCanonicalDecl();
10789 auto It = ParamPositions.find(PVD);
10790 assert(It != ParamPositions.end() && "Function parameter not found");
10791 Pos = It->second;
10792 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10793 PtrRescalingFactor = CGM.getContext()
10794 .getTypeSizeInChars(P->getPointeeType())
10795 .getQuantity();
10796 else if (PVD->getType()->isReferenceType()) {
10797 IsReferenceType = true;
10798 PtrRescalingFactor =
10799 CGM.getContext()
10800 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
10801 .getQuantity();
10802 }
10803 }
10804 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10805 if (*MI == OMPC_LINEAR_ref)
10806 ParamAttr.Kind = LinearRef;
10807 else if (*MI == OMPC_LINEAR_uval)
10808 ParamAttr.Kind = LinearUVal;
10809 else if (IsReferenceType)
10810 ParamAttr.Kind = LinearVal;
10811 else
10812 ParamAttr.Kind = Linear;
10813 // Assuming a stride of 1, for `linear` without modifiers.
10814 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10815 if (*SI) {
10817 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10818 if (const auto *DRE =
10819 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10820 if (const auto *StridePVD =
10821 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
10822 ParamAttr.HasVarStride = true;
10823 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
10824 assert(It != ParamPositions.end() &&
10825 "Function parameter not found");
10826 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
10827 }
10828 }
10829 } else {
10830 ParamAttr.StrideOrArg = Result.Val.getInt();
10831 }
10832 }
10833 // If we are using a linear clause on a pointer, we need to
10834 // rescale the value of linear_step with the byte size of the
10835 // pointee type.
10836 if (!ParamAttr.HasVarStride &&
10837 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
10838 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
10839 ++SI;
10840 ++MI;
10841 }
10842 llvm::APSInt VLENVal;
10843 SourceLocation ExprLoc;
10844 const Expr *VLENExpr = Attr->getSimdlen();
10845 if (VLENExpr) {
10846 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10847 ExprLoc = VLENExpr->getExprLoc();
10848 }
10849 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10850 if (CGM.getTriple().isX86()) {
10851 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10852 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10853 unsigned VLEN = VLENVal.getExtValue();
10854 StringRef MangledName = Fn->getName();
10855 if (CGM.getTarget().hasFeature("sve"))
10856 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10857 MangledName, 's', 128, Fn, ExprLoc);
10858 else if (CGM.getTarget().hasFeature("neon"))
10859 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10860 MangledName, 'n', 128, Fn, ExprLoc);
10861 }
10862 }
10863 FD = FD->getPreviousDecl();
10864 }
10865}
10866
10867namespace {
10868/// Cleanup action for doacross support.
10869class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10870public:
10871 static const int DoacrossFinArgs = 2;
10872
10873private:
10874 llvm::FunctionCallee RTLFn;
10875 llvm::Value *Args[DoacrossFinArgs];
10876
10877public:
10878 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10879 ArrayRef<llvm::Value *> CallArgs)
10880 : RTLFn(RTLFn) {
10881 assert(CallArgs.size() == DoacrossFinArgs);
10882 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10883 }
10884 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10885 if (!CGF.HaveInsertPoint())
10886 return;
10887 CGF.EmitRuntimeCall(RTLFn, Args);
10888 }
10889};
10890} // namespace
10891
10893 const OMPLoopDirective &D,
10894 ArrayRef<Expr *> NumIterations) {
10895 if (!CGF.HaveInsertPoint())
10896 return;
10897
10899 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10900 RecordDecl *RD;
10901 if (KmpDimTy.isNull()) {
10902 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
10903 // kmp_int64 lo; // lower
10904 // kmp_int64 up; // upper
10905 // kmp_int64 st; // stride
10906 // };
10907 RD = C.buildImplicitRecord("kmp_dim");
10908 RD->startDefinition();
10909 addFieldToRecordDecl(C, RD, Int64Ty);
10910 addFieldToRecordDecl(C, RD, Int64Ty);
10911 addFieldToRecordDecl(C, RD, Int64Ty);
10912 RD->completeDefinition();
10913 KmpDimTy = C.getRecordType(RD);
10914 } else {
10915 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10916 }
10917 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10918 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
10920
10921 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10922 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10923 enum { LowerFD = 0, UpperFD, StrideFD };
10924 // Fill dims with data.
10925 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10926 LValue DimsLVal = CGF.MakeAddrLValue(
10927 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10928 // dims.upper = num_iterations;
10929 LValue UpperLVal = CGF.EmitLValueForField(
10930 DimsLVal, *std::next(RD->field_begin(), UpperFD));
10931 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
10932 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
10933 Int64Ty, NumIterations[I]->getExprLoc());
10934 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10935 // dims.stride = 1;
10936 LValue StrideLVal = CGF.EmitLValueForField(
10937 DimsLVal, *std::next(RD->field_begin(), StrideFD));
10938 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10939 StrideLVal);
10940 }
10941
10942 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10943 // kmp_int32 num_dims, struct kmp_dim * dims);
10944 llvm::Value *Args[] = {
10946 getThreadID(CGF, D.getBeginLoc()),
10947 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10949 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
10950 CGM.VoidPtrTy)};
10951
10952 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10953 CGM.getModule(), OMPRTL___kmpc_doacross_init);
10954 CGF.EmitRuntimeCall(RTLFn, Args);
10955 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10957 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10958 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
10959 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10960 llvm::ArrayRef(FiniArgs));
10961}
10962
10963template <typename T>
10965 const T *C, llvm::Value *ULoc,
10966 llvm::Value *ThreadID) {
10967 QualType Int64Ty =
10968 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10969 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10971 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
10972 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10973 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10974 const Expr *CounterVal = C->getLoopData(I);
10975 assert(CounterVal);
10976 llvm::Value *CntVal = CGF.EmitScalarConversion(
10977 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10978 CounterVal->getExprLoc());
10979 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10980 /*Volatile=*/false, Int64Ty);
10981 }
10982 llvm::Value *Args[] = {
10983 ULoc, ThreadID,
10984 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
10985 llvm::FunctionCallee RTLFn;
10986 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
10987 OMPDoacrossKind<T> ODK;
10988 if (ODK.isSource(C)) {
10989 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10990 OMPRTL___kmpc_doacross_post);
10991 } else {
10992 assert(ODK.isSink(C) && "Expect sink modifier.");
10993 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10994 OMPRTL___kmpc_doacross_wait);
10995 }
10996 CGF.EmitRuntimeCall(RTLFn, Args);
10997}
10998
11000 const OMPDependClause *C) {
11001 return EmitDoacrossOrdered<OMPDependClause>(
11002 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11003 getThreadID(CGF, C->getBeginLoc()));
11004}
11005
11007 const OMPDoacrossClause *C) {
11008 return EmitDoacrossOrdered<OMPDoacrossClause>(
11009 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11010 getThreadID(CGF, C->getBeginLoc()));
11011}
11012
11014 llvm::FunctionCallee Callee,
11015 ArrayRef<llvm::Value *> Args) const {
11016 assert(Loc.isValid() && "Outlined function call location must be valid.");
11018
11019 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11020 if (Fn->doesNotThrow()) {
11021 CGF.EmitNounwindRuntimeCall(Fn, Args);
11022 return;
11023 }
11024 }
11025 CGF.EmitRuntimeCall(Callee, Args);
11026}
11027
11029 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11030 ArrayRef<llvm::Value *> Args) const {
11031 emitCall(CGF, Loc, OutlinedFn, Args);
11032}
11033
11035 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11036 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11038}
11039
11041 const VarDecl *NativeParam,
11042 const VarDecl *TargetParam) const {
11043 return CGF.GetAddrOfLocalVar(NativeParam);
11044}
11045
11046/// Return allocator value from expression, or return a null allocator (default
11047/// when no allocator specified).
11048static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11049 const Expr *Allocator) {
11050 llvm::Value *AllocVal;
11051 if (Allocator) {
11052 AllocVal = CGF.EmitScalarExpr(Allocator);
11053 // According to the standard, the original allocator type is a enum
11054 // (integer). Convert to pointer type, if required.
11055 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11056 CGF.getContext().VoidPtrTy,
11057 Allocator->getExprLoc());
11058 } else {
11059 // If no allocator specified, it defaults to the null allocator.
11060 AllocVal = llvm::Constant::getNullValue(
11062 }
11063 return AllocVal;
11064}
11065
11066/// Return the alignment from an allocate directive if present.
11067static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11068 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11069
11070 if (!AllocateAlignment)
11071 return nullptr;
11072
11073 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11074}
11075
11077 const VarDecl *VD) {
11078 if (!VD)
11079 return Address::invalid();
11080 Address UntiedAddr = Address::invalid();
11081 Address UntiedRealAddr = Address::invalid();
11082 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11083 if (It != FunctionToUntiedTaskStackMap.end()) {
11084 const UntiedLocalVarsAddressesMap &UntiedData =
11085 UntiedLocalVarsStack[It->second];
11086 auto I = UntiedData.find(VD);
11087 if (I != UntiedData.end()) {
11088 UntiedAddr = I->second.first;
11089 UntiedRealAddr = I->second.second;
11090 }
11091 }
11092 const VarDecl *CVD = VD->getCanonicalDecl();
11093 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11094 // Use the default allocation.
11095 if (!isAllocatableDecl(VD))
11096 return UntiedAddr;
11097 llvm::Value *Size;
11098 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11099 if (CVD->getType()->isVariablyModifiedType()) {
11100 Size = CGF.getTypeSize(CVD->getType());
11101 // Align the size: ((size + align - 1) / align) * align
11102 Size = CGF.Builder.CreateNUWAdd(
11103 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11104 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11105 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11106 } else {
11108 Size = CGM.getSize(Sz.alignTo(Align));
11109 }
11110 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11111 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11112 const Expr *Allocator = AA->getAllocator();
11113 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11114 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11116 Args.push_back(ThreadID);
11117 if (Alignment)
11118 Args.push_back(Alignment);
11119 Args.push_back(Size);
11120 Args.push_back(AllocVal);
11121 llvm::omp::RuntimeFunction FnID =
11122 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11123 llvm::Value *Addr = CGF.EmitRuntimeCall(
11124 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11125 getName({CVD->getName(), ".void.addr"}));
11126 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11127 CGM.getModule(), OMPRTL___kmpc_free);
11130 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11131 if (UntiedAddr.isValid())
11132 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11133
11134 // Cleanup action for allocate support.
11135 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11136 llvm::FunctionCallee RTLFn;
11137 SourceLocation::UIntTy LocEncoding;
11138 Address Addr;
11139 const Expr *AllocExpr;
11140
11141 public:
11142 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11143 SourceLocation::UIntTy LocEncoding, Address Addr,
11144 const Expr *AllocExpr)
11145 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11146 AllocExpr(AllocExpr) {}
11147 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11148 if (!CGF.HaveInsertPoint())
11149 return;
11150 llvm::Value *Args[3];
11151 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11152 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11154 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11155 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11156 Args[2] = AllocVal;
11157 CGF.EmitRuntimeCall(RTLFn, Args);
11158 }
11159 };
11160 Address VDAddr =
11161 UntiedRealAddr.isValid()
11162 ? UntiedRealAddr
11163 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11164 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11165 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11166 VDAddr, Allocator);
11167 if (UntiedRealAddr.isValid())
11168 if (auto *Region =
11169 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11170 Region->emitUntiedSwitch(CGF);
11171 return VDAddr;
11172 }
11173 return UntiedAddr;
11174}
11175
11177 const VarDecl *VD) const {
11178 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11179 if (It == FunctionToUntiedTaskStackMap.end())
11180 return false;
11181 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11182}
11183
11186 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11187 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11188 if (!NeedToPush)
11189 return;
11191 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11192 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11193 for (const Stmt *Ref : C->private_refs()) {
11194 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11195 const ValueDecl *VD;
11196 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11197 VD = DRE->getDecl();
11198 } else {
11199 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11200 assert((ME->isImplicitCXXThis() ||
11201 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11202 "Expected member of current class.");
11203 VD = ME->getMemberDecl();
11204 }
11205 DS.insert(VD);
11206 }
11207 }
11208}
11209
11211 if (!NeedToPush)
11212 return;
11214}
11215
11217 CodeGenFunction &CGF,
11218 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11219 std::pair<Address, Address>> &LocalVars)
11220 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11221 if (!NeedToPush)
11222 return;
11224 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11225 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11226}
11227
11229 if (!NeedToPush)
11230 return;
11232}
11233
11235 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11236
11237 return llvm::any_of(
11239 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11240}
11241
11242void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11243 const OMPExecutableDirective &S,
11244 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11245 const {
11246 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11247 // Vars in target/task regions must be excluded completely.
11248 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11249 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11251 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11252 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11253 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11254 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11255 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11256 }
11257 }
11258 // Exclude vars in private clauses.
11259 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11260 for (const Expr *Ref : C->varlist()) {
11261 if (!Ref->getType()->isScalarType())
11262 continue;
11263 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11264 if (!DRE)
11265 continue;
11266 NeedToCheckForLPCs.insert(DRE->getDecl());
11267 }
11268 }
11269 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11270 for (const Expr *Ref : C->varlist()) {
11271 if (!Ref->getType()->isScalarType())
11272 continue;
11273 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11274 if (!DRE)
11275 continue;
11276 NeedToCheckForLPCs.insert(DRE->getDecl());
11277 }
11278 }
11279 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11280 for (const Expr *Ref : C->varlist()) {
11281 if (!Ref->getType()->isScalarType())
11282 continue;
11283 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11284 if (!DRE)
11285 continue;
11286 NeedToCheckForLPCs.insert(DRE->getDecl());
11287 }
11288 }
11289 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11290 for (const Expr *Ref : C->varlist()) {
11291 if (!Ref->getType()->isScalarType())
11292 continue;
11293 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11294 if (!DRE)
11295 continue;
11296 NeedToCheckForLPCs.insert(DRE->getDecl());
11297 }
11298 }
11299 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11300 for (const Expr *Ref : C->varlist()) {
11301 if (!Ref->getType()->isScalarType())
11302 continue;
11303 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11304 if (!DRE)
11305 continue;
11306 NeedToCheckForLPCs.insert(DRE->getDecl());
11307 }
11308 }
11309 for (const Decl *VD : NeedToCheckForLPCs) {
11310 for (const LastprivateConditionalData &Data :
11312 if (Data.DeclToUniqueName.count(VD) > 0) {
11313 if (!Data.Disabled)
11314 NeedToAddForLPCsAsDisabled.insert(VD);
11315 break;
11316 }
11317 }
11318 }
11319}
11320
11321CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11322 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11323 : CGM(CGF.CGM),
11324 Action((CGM.getLangOpts().OpenMP >= 50 &&
11325 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11326 [](const OMPLastprivateClause *C) {
11327 return C->getKind() ==
11328 OMPC_LASTPRIVATE_conditional;
11329 }))
11330 ? ActionToDo::PushAsLastprivateConditional
11331 : ActionToDo::DoNotPush) {
11332 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11333 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11334 return;
11335 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11336 "Expected a push action.");
11339 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11340 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11341 continue;
11342
11343 for (const Expr *Ref : C->varlist()) {
11344 Data.DeclToUniqueName.insert(std::make_pair(
11345 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11346 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11347 }
11348 }
11349 Data.IVLVal = IVLVal;
11350 Data.Fn = CGF.CurFn;
11351}
11352
11353CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11355 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11356 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11357 if (CGM.getLangOpts().OpenMP < 50)
11358 return;
11359 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11360 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11361 if (!NeedToAddForLPCsAsDisabled.empty()) {
11362 Action = ActionToDo::DisableLastprivateConditional;
11363 LastprivateConditionalData &Data =
11364 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11365 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11366 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11367 Data.Fn = CGF.CurFn;
11368 Data.Disabled = true;
11369 }
11370}
11371
11374 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11375 return LastprivateConditionalRAII(CGF, S);
11376}
11377
11379 if (CGM.getLangOpts().OpenMP < 50)
11380 return;
11381 if (Action == ActionToDo::DisableLastprivateConditional) {
11382 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11383 "Expected list of disabled private vars.");
11385 }
11386 if (Action == ActionToDo::PushAsLastprivateConditional) {
11387 assert(
11389 "Expected list of lastprivate conditional vars.");
11391 }
11392}
11393
11395 const VarDecl *VD) {
11397 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11398 QualType NewType;
11399 const FieldDecl *VDField;
11400 const FieldDecl *FiredField;
11401 LValue BaseLVal;
11402 auto VI = I->getSecond().find(VD);
11403 if (VI == I->getSecond().end()) {
11404 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11405 RD->startDefinition();
11406 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11407 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11408 RD->completeDefinition();
11409 NewType = C.getRecordType(RD);
11410 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11411 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11412 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11413 } else {
11414 NewType = std::get<0>(VI->getSecond());
11415 VDField = std::get<1>(VI->getSecond());
11416 FiredField = std::get<2>(VI->getSecond());
11417 BaseLVal = std::get<3>(VI->getSecond());
11418 }
11419 LValue FiredLVal =
11420 CGF.EmitLValueForField(BaseLVal, FiredField);
11422 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11423 FiredLVal);
11424 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11425}
11426
11427namespace {
11428/// Checks if the lastprivate conditional variable is referenced in LHS.
11429class LastprivateConditionalRefChecker final
11430 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11432 const Expr *FoundE = nullptr;
11433 const Decl *FoundD = nullptr;
11434 StringRef UniqueDeclName;
11435 LValue IVLVal;
11436 llvm::Function *FoundFn = nullptr;
11438
11439public:
11440 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11442 llvm::reverse(LPM)) {
11443 auto It = D.DeclToUniqueName.find(E->getDecl());
11444 if (It == D.DeclToUniqueName.end())
11445 continue;
11446 if (D.Disabled)
11447 return false;
11448 FoundE = E;
11449 FoundD = E->getDecl()->getCanonicalDecl();
11450 UniqueDeclName = It->second;
11451 IVLVal = D.IVLVal;
11452 FoundFn = D.Fn;
11453 break;
11454 }
11455 return FoundE == E;
11456 }
11457 bool VisitMemberExpr(const MemberExpr *E) {
11458 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11459 return false;
11461 llvm::reverse(LPM)) {
11462 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11463 if (It == D.DeclToUniqueName.end())
11464 continue;
11465 if (D.Disabled)
11466 return false;
11467 FoundE = E;
11468 FoundD = E->getMemberDecl()->getCanonicalDecl();
11469 UniqueDeclName = It->second;
11470 IVLVal = D.IVLVal;
11471 FoundFn = D.Fn;
11472 break;
11473 }
11474 return FoundE == E;
11475 }
11476 bool VisitStmt(const Stmt *S) {
11477 for (const Stmt *Child : S->children()) {
11478 if (!Child)
11479 continue;
11480 if (const auto *E = dyn_cast<Expr>(Child))
11481 if (!E->isGLValue())
11482 continue;
11483 if (Visit(Child))
11484 return true;
11485 }
11486 return false;
11487 }
11488 explicit LastprivateConditionalRefChecker(
11490 : LPM(LPM) {}
11491 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11492 getFoundData() const {
11493 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11494 }
11495};
11496} // namespace
11497
11499 LValue IVLVal,
11500 StringRef UniqueDeclName,
11501 LValue LVal,
11503 // Last updated loop counter for the lastprivate conditional var.
11504 // int<xx> last_iv = 0;
11505 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11506 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11507 LLIVTy, getName({UniqueDeclName, "iv"}));
11508 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11509 IVLVal.getAlignment().getAsAlign());
11510 LValue LastIVLVal =
11511 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11512
11513 // Last value of the lastprivate conditional.
11514 // decltype(priv_a) last_a;
11515 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11516 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11517 cast<llvm::GlobalVariable>(Last)->setAlignment(
11518 LVal.getAlignment().getAsAlign());
11519 LValue LastLVal =
11520 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11521
11522 // Global loop counter. Required to handle inner parallel-for regions.
11523 // iv
11524 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11525
11526 // #pragma omp critical(a)
11527 // if (last_iv <= iv) {
11528 // last_iv = iv;
11529 // last_a = priv_a;
11530 // }
11531 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11532 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11533 Action.Enter(CGF);
11534 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11535 // (last_iv <= iv) ? Check if the variable is updated and store new
11536 // value in global var.
11537 llvm::Value *CmpRes;
11538 if (IVLVal.getType()->isSignedIntegerType()) {
11539 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11540 } else {
11541 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11542 "Loop iteration variable must be integer.");
11543 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11544 }
11545 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11546 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11547 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11548 // {
11549 CGF.EmitBlock(ThenBB);
11550
11551 // last_iv = iv;
11552 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11553
11554 // last_a = priv_a;
11555 switch (CGF.getEvaluationKind(LVal.getType())) {
11556 case TEK_Scalar: {
11557 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11558 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11559 break;
11560 }
11561 case TEK_Complex: {
11563 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11564 break;
11565 }
11566 case TEK_Aggregate:
11567 llvm_unreachable(
11568 "Aggregates are not supported in lastprivate conditional.");
11569 }
11570 // }
11571 CGF.EmitBranch(ExitBB);
11572 // There is no need to emit line number for unconditional branch.
11574 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11575 };
11576
11577 if (CGM.getLangOpts().OpenMPSimd) {
11578 // Do not emit as a critical region as no parallel region could be emitted.
11579 RegionCodeGenTy ThenRCG(CodeGen);
11580 ThenRCG(CGF);
11581 } else {
11582 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11583 }
11584}
11585
11587 const Expr *LHS) {
11588 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11589 return;
11590 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11591 if (!Checker.Visit(LHS))
11592 return;
11593 const Expr *FoundE;
11594 const Decl *FoundD;
11595 StringRef UniqueDeclName;
11596 LValue IVLVal;
11597 llvm::Function *FoundFn;
11598 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11599 Checker.getFoundData();
11600 if (FoundFn != CGF.CurFn) {
11601 // Special codegen for inner parallel regions.
11602 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11603 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11604 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11605 "Lastprivate conditional is not found in outer region.");
11606 QualType StructTy = std::get<0>(It->getSecond());
11607 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11608 LValue PrivLVal = CGF.EmitLValue(FoundE);
11610 PrivLVal.getAddress(),
11611 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11612 CGF.ConvertTypeForMem(StructTy));
11613 LValue BaseLVal =
11614 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11615 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11616 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11617 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11618 FiredLVal, llvm::AtomicOrdering::Unordered,
11619 /*IsVolatile=*/true, /*isInit=*/false);
11620 return;
11621 }
11622
11623 // Private address of the lastprivate conditional in the current context.
11624 // priv_a
11625 LValue LVal = CGF.EmitLValue(FoundE);
11626 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11627 FoundE->getExprLoc());
11628}
11629
11632 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11633 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11634 return;
11635 auto Range = llvm::reverse(LastprivateConditionalStack);
11636 auto It = llvm::find_if(
11637 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11638 if (It == Range.end() || It->Fn != CGF.CurFn)
11639 return;
11640 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11641 assert(LPCI != LastprivateConditionalToTypes.end() &&
11642 "Lastprivates must be registered already.");
11644 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11645 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11646 for (const auto &Pair : It->DeclToUniqueName) {
11647 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11648 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11649 continue;
11650 auto I = LPCI->getSecond().find(Pair.first);
11651 assert(I != LPCI->getSecond().end() &&
11652 "Lastprivate must be rehistered already.");
11653 // bool Cmp = priv_a.Fired != 0;
11654 LValue BaseLVal = std::get<3>(I->getSecond());
11655 LValue FiredLVal =
11656 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11657 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11658 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11659 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11660 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11661 // if (Cmp) {
11662 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11663 CGF.EmitBlock(ThenBB);
11664 Address Addr = CGF.GetAddrOfLocalVar(VD);
11665 LValue LVal;
11666 if (VD->getType()->isReferenceType())
11667 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11669 else
11670 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11672 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11673 D.getBeginLoc());
11675 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11676 // }
11677 }
11678}
11679
11681 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11683 if (CGF.getLangOpts().OpenMP < 50)
11684 return;
11685 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11686 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11687 "Unknown lastprivate conditional variable.");
11688 StringRef UniqueName = It->second;
11689 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11690 // The variable was not updated in the region - exit.
11691 if (!GV)
11692 return;
11693 LValue LPLVal = CGF.MakeRawAddrLValue(
11694 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11695 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11696 CGF.EmitStoreOfScalar(Res, PrivLVal);
11697}
11698
11701 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11702 const RegionCodeGenTy &CodeGen) {
11703 llvm_unreachable("Not supported in SIMD-only mode");
11704}
11705
11708 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11709 const RegionCodeGenTy &CodeGen) {
11710 llvm_unreachable("Not supported in SIMD-only mode");
11711}
11712
11714 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11715 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11716 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11717 bool Tied, unsigned &NumberOfParts) {
11718 llvm_unreachable("Not supported in SIMD-only mode");
11719}
11720
11723 llvm::Function *OutlinedFn,
11724 ArrayRef<llvm::Value *> CapturedVars,
11725 const Expr *IfCond,
11726 llvm::Value *NumThreads) {
11727 llvm_unreachable("Not supported in SIMD-only mode");
11728}
11729
11731 CodeGenFunction &CGF, StringRef CriticalName,
11732 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11733 const Expr *Hint) {
11734 llvm_unreachable("Not supported in SIMD-only mode");
11735}
11736
11738 const RegionCodeGenTy &MasterOpGen,
11740 llvm_unreachable("Not supported in SIMD-only mode");
11741}
11742
11744 const RegionCodeGenTy &MasterOpGen,
11746 const Expr *Filter) {
11747 llvm_unreachable("Not supported in SIMD-only mode");
11748}
11749
11752 llvm_unreachable("Not supported in SIMD-only mode");
11753}
11754
11756 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11758 llvm_unreachable("Not supported in SIMD-only mode");
11759}
11760
11762 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11763 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11765 ArrayRef<const Expr *> AssignmentOps) {
11766 llvm_unreachable("Not supported in SIMD-only mode");
11767}
11768
11770 const RegionCodeGenTy &OrderedOpGen,
11772 bool IsThreads) {
11773 llvm_unreachable("Not supported in SIMD-only mode");
11774}
11775
11779 bool EmitChecks,
11780 bool ForceSimpleCall) {
11781 llvm_unreachable("Not supported in SIMD-only mode");
11782}
11783
11786 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11787 bool Ordered, const DispatchRTInput &DispatchValues) {
11788 llvm_unreachable("Not supported in SIMD-only mode");
11789}
11790
11793 llvm_unreachable("Not supported in SIMD-only mode");
11794}
11795
11798 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11799 llvm_unreachable("Not supported in SIMD-only mode");
11800}
11801
11804 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11805 llvm_unreachable("Not supported in SIMD-only mode");
11806}
11807
11810 unsigned IVSize,
11811 bool IVSigned) {
11812 llvm_unreachable("Not supported in SIMD-only mode");
11813}
11814
11817 OpenMPDirectiveKind DKind) {
11818 llvm_unreachable("Not supported in SIMD-only mode");
11819}
11820
11823 unsigned IVSize, bool IVSigned,
11824 Address IL, Address LB,
11825 Address UB, Address ST) {
11826 llvm_unreachable("Not supported in SIMD-only mode");
11827}
11828
11830 llvm::Value *NumThreads,
11832 llvm_unreachable("Not supported in SIMD-only mode");
11833}
11834
11836 ProcBindKind ProcBind,
11838 llvm_unreachable("Not supported in SIMD-only mode");
11839}
11840
11842 const VarDecl *VD,
11843 Address VDAddr,
11845 llvm_unreachable("Not supported in SIMD-only mode");
11846}
11847
11849 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11850 CodeGenFunction *CGF) {
11851 llvm_unreachable("Not supported in SIMD-only mode");
11852}
11853
11855 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11856 llvm_unreachable("Not supported in SIMD-only mode");
11857}
11858
11862 llvm::AtomicOrdering AO) {
11863 llvm_unreachable("Not supported in SIMD-only mode");
11864}
11865
11868 llvm::Function *TaskFunction,
11869 QualType SharedsTy, Address Shareds,
11870 const Expr *IfCond,
11871 const OMPTaskDataTy &Data) {
11872 llvm_unreachable("Not supported in SIMD-only mode");
11873}
11874
11877 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11878 const Expr *IfCond, const OMPTaskDataTy &Data) {
11879 llvm_unreachable("Not supported in SIMD-only mode");
11880}
11881
11885 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11886 assert(Options.SimpleReduction && "Only simple reduction is expected.");
11887 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11888 ReductionOps, Options);
11889}
11890
11893 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11894 llvm_unreachable("Not supported in SIMD-only mode");
11895}
11896
11899 bool IsWorksharingReduction) {
11900 llvm_unreachable("Not supported in SIMD-only mode");
11901}
11902
11905 ReductionCodeGen &RCG,
11906 unsigned N) {
11907 llvm_unreachable("Not supported in SIMD-only mode");
11908}
11909
11912 llvm::Value *ReductionsPtr,
11913 LValue SharedLVal) {
11914 llvm_unreachable("Not supported in SIMD-only mode");
11915}
11916
11919 const OMPTaskDataTy &Data) {
11920 llvm_unreachable("Not supported in SIMD-only mode");
11921}
11922
11925 OpenMPDirectiveKind CancelRegion) {
11926 llvm_unreachable("Not supported in SIMD-only mode");
11927}
11928
11930 SourceLocation Loc, const Expr *IfCond,
11931 OpenMPDirectiveKind CancelRegion) {
11932 llvm_unreachable("Not supported in SIMD-only mode");
11933}
11934
11936 const OMPExecutableDirective &D, StringRef ParentName,
11937 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11938 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11939 llvm_unreachable("Not supported in SIMD-only mode");
11940}
11941
11944 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11945 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11946 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11947 const OMPLoopDirective &D)>
11948 SizeEmitter) {
11949 llvm_unreachable("Not supported in SIMD-only mode");
11950}
11951
11953 llvm_unreachable("Not supported in SIMD-only mode");
11954}
11955
11957 llvm_unreachable("Not supported in SIMD-only mode");
11958}
11959
11961 return false;
11962}
11963
11967 llvm::Function *OutlinedFn,
11968 ArrayRef<llvm::Value *> CapturedVars) {
11969 llvm_unreachable("Not supported in SIMD-only mode");
11970}
11971
11973 const Expr *NumTeams,
11974 const Expr *ThreadLimit,
11976 llvm_unreachable("Not supported in SIMD-only mode");
11977}
11978
11980 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11981 const Expr *Device, const RegionCodeGenTy &CodeGen,
11983 llvm_unreachable("Not supported in SIMD-only mode");
11984}
11985
11987 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11988 const Expr *Device) {
11989 llvm_unreachable("Not supported in SIMD-only mode");
11990}
11991
11993 const OMPLoopDirective &D,
11994 ArrayRef<Expr *> NumIterations) {
11995 llvm_unreachable("Not supported in SIMD-only mode");
11996}
11997
11999 const OMPDependClause *C) {
12000 llvm_unreachable("Not supported in SIMD-only mode");
12001}
12002
12004 const OMPDoacrossClause *C) {
12005 llvm_unreachable("Not supported in SIMD-only mode");
12006}
12007
12008const VarDecl *
12010 const VarDecl *NativeParam) const {
12011 llvm_unreachable("Not supported in SIMD-only mode");
12012}
12013
12014Address
12016 const VarDecl *NativeParam,
12017 const VarDecl *TargetParam) const {
12018 llvm_unreachable("Not supported in SIMD-only mode");
12019}
#define V(N, I)
Definition: ASTContext.h:3453
StringRef P
#define SM(sm)
Definition: Cuda.cpp:84
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, const Expr *Ref)
Generates unique name for artificial threadprivate variables.
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static llvm::Value * emitDynCGGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
const Decl * D
enum clang::sema::@1724::IndirectLocalPathEntry::EntryKind Kind
Expr * E
int Priority
Definition: Format.cpp:3055
#define X(type, name)
Definition: Value.h:144
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
SourceRange Range
Definition: SemaObjC.cpp:758
SourceLocation Loc
Definition: SemaObjC.cpp:759
Defines the SourceManager interface.
const char * Data
This file defines OpenMP AST classes for executable directives and clauses.
SourceLocation Begin
__DEVICE__ int max(int __a, int __b)
#define bool
Definition: amdgpuintrin.h:20
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:188
SourceManager & getSourceManager()
Definition: ASTContext.h:741
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2915
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
Definition: ASTContext.h:2732
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
Definition: ASTContext.h:1187
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition: ASTContext.h:834
CanQualType BoolTy
Definition: ASTContext.h:1161
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1160
const VariableArrayType * getAsVariableArrayType(QualType T) const
Definition: ASTContext.h:2918
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:799
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
Definition: RecordLayout.h:210
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition: Expr.cpp:5184
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3577
Attr - This represents one attribute.
Definition: Attr.h:43
Represents a C++ constructor within a class.
Definition: DeclCXX.h:2553
Represents a C++ destructor within a class.
Definition: DeclCXX.h:2817
Represents a static or instance method of a struct/union/class.
Definition: DeclCXX.h:2078
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition: DeclCXX.h:2204
QualType getFunctionObjectParameterType() const
Definition: DeclCXX.h:2228
Represents a C++ struct/union/class.
Definition: DeclCXX.h:258
base_class_range bases()
Definition: DeclCXX.h:620
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition: DeclCXX.h:1030
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition: DeclCXX.cpp:1747
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition: DeclCXX.h:614
base_class_range vbases()
Definition: DeclCXX.h:637
capture_const_range captures() const
Definition: DeclCXX.h:1109
ctor_range ctors() const
Definition: DeclCXX.h:682
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition: DeclCXX.cpp:2081
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Definition: Redeclarable.h:348
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition: Stmt.h:3797
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition: Stmt.h:3831
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition: Stmt.cpp:1312
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition: Stmt.h:3837
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition: Stmt.h:3825
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition: Stmt.h:3828
This captures a statement into a function.
Definition: Stmt.h:3784
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition: Stmt.h:3935
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition: Stmt.h:3905
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition: Stmt.h:3888
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition: Stmt.cpp:1438
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition: Stmt.h:3930
capture_range captures()
Definition: Stmt.h:3922
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition: CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition: CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition: CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:251
CharUnits getAlignment() const
Definition: Address.h:189
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:207
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition: Address.h:259
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:274
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:267
bool isValid() const
Definition: Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:199
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:898
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:905
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:915
CGBlockInfo - Information to generate a block literal.
Definition: CGBlocks.h:156
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:136
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:292
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:203
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition: CGBuilder.h:241
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:108
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:365
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:278
MangleContext & getMangleContext()
Gets the mangle context.
Definition: CGCXXABI.h:113
CGFunctionInfo - Class to encapsulate the information about a function definition.
Manages list of lastprivate conditional decls for the specified directive.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::FunctionType * Kmpc_MicroTy
The type for a microtask which gets passed to __kmpc_fork_call().
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
llvm::Type * getKmpc_MicroPointerTy()
Returns pointer to kmpc_micro type.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
llvm::SmallVector< NontemporalDeclsSet, 4 > NontemporalDeclsStack
Stack for list of declarations in current context marked as nontemporal.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
CGRecordLayout - This class handles struct and union layout info while lowering AST types to LLVM typ...
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
CGCapturedStmtInfo * CapturedStmtInfo
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
const CodeGen::CGBlockInfo * BlockInfo
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
llvm::Type * ConvertTypeForMem(QualType T)
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
JumpDest ReturnBlock
ReturnBlock - Unified return block.
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
void EmitAutoVarCleanups(const AutoVarEmission &emission)
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
CodeGenTypes & getTypes() const
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
LValue EmitStringLiteralLValue(const StringLiteral *E)
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void handleCUDALaunchBoundsAttr(llvm::Function *F, const CUDALaunchBoundsAttr *A, int32_t *MaxThreadsVal=nullptr, int32_t *MinBlocksVal=nullptr, int32_t *MaxClusterRankVal=nullptr)
Emit the IR encoding to attach the CUDA launch bounds attribute to F.
Definition: NVPTX.cpp:351
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const
Return the store size, in character units, of the given LLVM type.
void handleAMDGPUWavesPerEUAttr(llvm::Function *F, const AMDGPUWavesPerEUAttr *A)
Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to F.
Definition: AMDGPU.cpp:713
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
const TargetInfo & getTarget() const
void EmitGlobal(GlobalDecl D)
Emit code for a single global function or var decl.
void handleAMDGPUFlatWorkGroupSizeAttr(llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, const ReqdWorkGroupSizeAttr *ReqdWGS=nullptr, int32_t *MinThreadsVal=nullptr, int32_t *MaxThreadsVal=nullptr)
Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute to F.
Definition: AMDGPU.cpp:686
llvm::GlobalValue::LinkageTypes getLLVMLinkageVarDefinition(const VarDecl *VD)
Returns LLVM linkage for a declarator.
CGCXXABI & getCXXABI() const
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
const llvm::Triple & getTriple() const
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
llvm::Constant * GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition=NotForDefinition)
ASTContext & getContext() const
const TargetCodeGenInfo & getTargetCodeGenInfo()
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition: CGDecl.cpp:2829
llvm::GlobalValue * GetGlobalValue(StringRef Ref)
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Function * CreateGlobalInitOrCleanUpFunction(llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, SourceLocation Loc=SourceLocation(), bool TLS=false, llvm::GlobalVariable::LinkageTypes Linkage=llvm::GlobalVariable::InternalLinkage)
Definition: CGDeclCXX.cpp:443
llvm::ConstantInt * getSize(CharUnits numChars)
Emit the given number of characters as a value of type size_t.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1630
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:679
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
llvm::Type * ConvertTypeForMem(QualType T)
ConvertTypeForMem - Convert type T into a llvm::Type.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type 'void ()'.
Definition: CGCall.cpp:721
A specialization of Address that requires the address to be an LLVM Constant.
Definition: Address.h:294
static ConstantAddress invalid()
Definition: Address.h:302
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
void popTerminate()
Pops a terminate handler off the stack.
Definition: CGCleanup.h:631
void pushTerminate()
Push a terminate handler on the stack.
Definition: CGCleanup.cpp:243
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:382
LValue - This represents an lvalue references.
Definition: CGValue.h:182
CharUnits getAlignment() const
Definition: CGValue.h:343
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition: CGValue.h:338
Address getAddress() const
Definition: CGValue.h:361
LValueBaseInfo getBaseInfo() const
Definition: CGValue.h:346
QualType getType() const
Definition: CGValue.h:291
TBAAAccessInfo getTBAAInfo() const
Definition: CGValue.h:335
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:42
static RValue get(llvm::Value *V)
Definition: CGValue.h:98
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:71
An abstract representation of an aligned address.
Definition: Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:77
llvm::Value * getPointer() const
Definition: Address.h:66
static RawAddress invalid()
Definition: Address.h:61
bool isValid() const
Definition: Address.h:62
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const
setTargetAttributes - Provides a convenient hook to handle extra target-specific attributes for the g...
Definition: TargetInfo.h:76
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
Definition: StmtVisitor.h:195
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition: DeclBase.h:1435
void addDecl(Decl *D)
Add the declaration D into this context.
Definition: DeclBase.cpp:1768
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1265
ValueDecl * getDecl()
Definition: Expr.h:1333
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:86
SourceLocation getEndLoc() const LLVM_READONLY
Definition: DeclBase.h:438
T * getAttr() const
Definition: DeclBase.h:576
bool hasAttrs() const
Definition: DeclBase.h:521
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:520
void addAttr(Attr *A)
Definition: DeclBase.cpp:1010
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
Definition: DeclBase.h:1082
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:562
SourceLocation getLocation() const
Definition: DeclBase.h:442
DeclContext * getDeclContext()
Definition: DeclBase.h:451
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: DeclBase.h:434
AttrVec & getAttrs()
Definition: DeclBase.h:527
bool hasAttr() const
Definition: DeclBase.h:580
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition: DeclBase.h:967
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Decl.h:786
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1493
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition: Diagnostic.h:896
The return type of classify().
Definition: Expr.h:330
This represents one expression.
Definition: Expr.h:110
bool isGLValue() const
Definition: Expr.h:280
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition: Expr.h:671
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition: Expr.h:669
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3102
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3097
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3594
bool isIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:276
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition: Expr.cpp:3958
Represents a member of a struct/union/class.
Definition: Decl.h:3033
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition: Decl.cpp:4555
Represents a function declaration or definition.
Definition: Decl.h:1935
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2672
QualType getReturnType() const
Definition: Decl.h:2720
ArrayRef< ParmVarDecl * > parameters() const
Definition: Decl.h:2649
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:3623
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:3702
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5406
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition: Expr.cpp:980
Describes the capture of a variable or of this, or of a C++1y init-capture.
Definition: LambdaCapture.h:25
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
Definition: LangOptions.h:577
std::vector< llvm::Triple > OMPTargetTriples
Triples of the OpenMP targets that the host code codegen should take into account in order to generat...
Definition: LangOptions.h:573
virtual void mangleCanonicalTypeName(QualType T, raw_ostream &, bool NormalizeIntegers=false)=0
Generates a unique string for an externally visible type for use with TBAA or type uniquing.
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3236
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition: Expr.h:3319
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:280
bool isExternallyVisible() const
Definition: Decl.h:412
This represents clause 'affinity' in the '#pragma omp task'-based directives.
Class that represents a component of a mappable expression.
ArrayRef< MappableComponent > MappableExprComponentListRef
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
Definition: OpenMPClause.h:219
This is a basic class for representing single OpenMP clause.
Definition: OpenMPClause.h:55
This represents '#pragma omp declare mapper ...' directive.
Definition: DeclOpenMP.h:287
This represents '#pragma omp declare reduction ...' directive.
Definition: DeclOpenMP.h:177
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition: DeclOpenMP.h:238
This represents implicit clause 'depend' for the '#pragma omp task' directive.
This represents 'detach' clause in the '#pragma omp task' directive.
This represents 'device' clause in the '#pragma omp ...' directive.
This represents the 'doacross' clause for the '#pragma omp ordered' directive.
This is a basic class for representing single OpenMP executable directive.
Definition: StmtOpenMP.h:266
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)
Definition: StmtOpenMP.h:446
This represents clause 'firstprivate' in the '#pragma omp ...' directives.
This represents clause 'has_device_ptr' in the '#pragma omp ...' directives.
This represents 'if' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:682
Expr * getCondition() const
Returns condition.
Definition: OpenMPClause.h:751
This represents clause 'in_reduction' in the '#pragma omp task' directives.
This represents clause 'is_device_ptr' in the '#pragma omp ...' directives.
OpenMP 5.0 [2.1.6 Iterators] Iterators are identifiers that expand to multiple values in the clause o...
Definition: ExprOpenMP.h:151
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
This represents clause 'linear' in the '#pragma omp ...' directives.
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc....
Definition: StmtOpenMP.h:1004
This represents clause 'map' in the '#pragma omp ...' directives.
This represents clause 'nontemporal' in the '#pragma omp ...' directives.
This represents 'nowait' clause in the '#pragma omp ...' directive.
This represents 'num_teams' clause in the '#pragma omp ...' directive.
This represents 'num_threads' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:831
This represents 'ordered' clause in the '#pragma omp ...' directive.
This represents clause 'private' in the '#pragma omp ...' directives.
This represents clause 'reduction' in the '#pragma omp ...' directives.
This represents '#pragma omp requires...' directive.
Definition: DeclOpenMP.h:417
This represents 'thread_limit' clause in the '#pragma omp ...' directive.
This represents clause 'uses_allocators' in the '#pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the '#pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the '#pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition: Expr.h:1173
Represents a parameter to a function.
Definition: Decl.h:1725
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3198
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition: Type.h:929
void addRestrict()
Add the restrict qualifier to this QualType.
Definition: Type.h:1167
QualType withRestrict() const
Definition: Type.h:1170
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:996
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:7936
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition: Type.h:7976
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:8139
QualType getCanonicalType() const
Definition: Type.h:7988
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition: Type.h:1531
Represents a struct/union/class.
Definition: Decl.h:4162
field_iterator field_end() const
Definition: Decl.h:4379
field_range fields() const
Definition: Decl.h:4376
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition: Decl.cpp:5107
bool field_empty() const
Definition: Decl.h:4384
field_iterator field_begin() const
Definition: Decl.cpp:5095
RecordDecl * getDecl() const
Definition: Type.h:6087
decl_type * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
Definition: Redeclarable.h:203
decl_type * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
Definition: Redeclarable.h:225
Base for LValueReferenceType and RValueReferenceType.
Definition: Type.h:3439
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
fileinfo_iterator fileinfo_end() const
SourceLocation translateFileLineCol(const FileEntry *SourceFile, unsigned Line, unsigned Col) const
Get the source location for the given file:line:col triplet.
fileinfo_iterator fileinfo_begin() const
A trivial tuple used to represent a source range.
Stmt - This represents one statement.
Definition: Stmt.h:84
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:333
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition: Stmt.cpp:204
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:345
void startDefinition()
Starts the definition of this tag declaration.
Definition: Decl.cpp:4762
bool isUnion() const
Definition: Decl.h:3784
bool isTLSSupported() const
Whether the target supports thread-local storage.
Definition: TargetInfo.h:1583
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
Definition: TargetInfo.h:1493
The base class of the type hierarchy.
Definition: Type.h:1828
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1916
bool isVoidType() const
Definition: Type.h:8515
bool isSignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is signed or an enumeration types whose underlying ty...
Definition: Type.cpp:2201
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition: Type.h:8693
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2180
bool isArrayType() const
Definition: Type.h:8263
bool isPointerType() const
Definition: Type.h:8191
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:8555
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8805
bool isReferenceType() const
Definition: Type.h:8209
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:738
bool isLValueReferenceType() const
Definition: Type.h:8213
QualType getCanonicalTypeInternal() const
Definition: Type.h:2989
const RecordType * getAsStructureType() const
Definition: Type.cpp:754
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition: Type.h:8686
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition: Type.h:2724
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition: Type.h:8791
bool isFloatingType() const
Definition: Type.cpp:2283
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2230
bool isAnyPointerType() const
Definition: Type.h:8199
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8736
bool isRecordType() const
Definition: Type.h:8291
bool isUnionType() const
Definition: Type.cpp:704
TagDecl * getAsTagDecl() const
Retrieves the TagDecl that this type refers to, either because the type is a TagType or because it is...
Definition: Type.cpp:1924
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1920
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:671
QualType getType() const
Definition: Decl.h:682
Represents a variable declaration or definition.
Definition: Decl.h:882
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:2246
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition: Decl.cpp:2355
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition: Decl.h:1168
bool hasLocalStorage() const
Returns true if a variable with function scope is a non-static local variable.
Definition: Decl.h:1135
@ DeclarationOnly
This declaration is only a declaration.
Definition: Decl.h:1246
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition: Decl.cpp:2364
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition: Decl.h:1213
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition: Decl.h:1309
Represents a C array with a specified size that is not an integer-constant-expression.
Definition: Type.h:3808
Expr * getSizeExpr() const
Definition: Type.h:3827
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition: AttrIterator.h:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
@ NotKnownNonNull
Definition: Address.h:33
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Ctor_Complete
Complete object ctor.
Definition: ABI.h:25
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition: OpenMPKinds.h:25
@ ICIS_NoInit
No in-class initializer.
Definition: Specifiers.h:272
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition: Lambda.h:37
BinaryOperatorKind
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition: OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition: OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition: OpenMPKinds.h:40
@ CR_OpenMP
Definition: CapturedStmt.h:19
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Definition: OpenMPKinds.h:104
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ Result
The result type of a method or function.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition: OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition: OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition: ABI.h:35
@ Union
The "union" keyword.
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition: Specifiers.h:135
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:139
const FunctionProtoType * T
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
Definition: OpenMPKinds.h:132
@ OMPC_DEVICE_unknown
Definition: OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition: OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition: OpenMPKinds.h:80
@ Other
Other implicit parameter.
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition: OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition: OpenMPKinds.h:35
@ AS_public
Definition: Specifiers.h:124
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition: OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition: OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define false
Definition: stdbool.h:26
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
llvm::IntegerType * IntTy
int
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
Extra information about a function prototype.
Definition: Type.h:5192
Helper expressions and declaration for OMPIteratorExpr class for each iteration space.
Definition: ExprOpenMP.h:111
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition: ExprOpenMP.h:121
Expr * Update
Update expression for the originally specified iteration variable, calculated as VD = Begin + Counter...
Definition: ExprOpenMP.h:119
VarDecl * CounterVD
Internal normalized counter.
Definition: ExprOpenMP.h:113
Data for list of allocators.
Scheduling data for loop-based OpenMP directives.
Definition: OpenMPKinds.h:180
OpenMPScheduleClauseModifier M2
Definition: OpenMPKinds.h:183
OpenMPScheduleClauseModifier M1
Definition: OpenMPKinds.h:182
OpenMPScheduleClauseKind Schedule
Definition: OpenMPKinds.h:181
Describes how types, statements, expressions, and declarations should be printed.
Definition: PrettyPrinter.h:57