clang 19.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/Attr.h"
21#include "clang/AST/Decl.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SetOperations.h"
32#include "llvm/ADT/SmallBitVector.h"
33#include "llvm/ADT/StringExtras.h"
34#include "llvm/Bitcode/BitcodeReader.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/GlobalValue.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Format.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 CGF.GetAddrOfLocalVar(PartIDVar),
166 PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171 CGF.EmitBlock(DoneBB);
173 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175 CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 CGF.GetAddrOfLocalVar(PartIDVar),
183 PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185 PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(".untied.next.");
190 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192 CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(CurPoint);
194 CGF.EmitBlock(CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
375 C.getLocation());
376 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
560
561public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
567 }
568};
569
570} // anonymous namespace
571
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
580 }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
592 return DRD;
593 return nullptr;
594}
595
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
604 const auto *CE = cast<CallExpr>(InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
614 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
615 (void)PrivateScope.Privatize();
617 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
618 CGF.EmitIgnoredExpr(InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
641 }
642 }
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646 /*IsInitializer=*/false);
647 }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
662
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
668
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.emitRawPointer(CGF);
672 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
682
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BodyBB);
686
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
688
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
693 "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698 }
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(DestBegin, EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705
706 // Emit copy.
707 {
708 CodeGenFunction::RunCleanupsScope InitScope(CGF);
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
711 SrcElementCurrent, ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
715 }
716
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
721 "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
723 }
724
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
728 "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
733 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
734
735 // Done.
736 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
746 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
747 return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SharedAddr);
765}
766
769 ArrayRef<const Expr *> Privates,
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(Shareds.size());
772 SharedAddresses.reserve(Shareds.size());
773 Sizes.reserve(Shareds.size());
774 BaseDecls.reserve(Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
780 std::advance(IOrig, 1);
781 std::advance(IPriv, 1);
782 std::advance(IRed, 1);
783 }
784}
785
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
791 SharedAddresses.emplace_back(First, Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(First, Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
797 OrigAddresses.emplace_back(First, Second);
798 }
799}
800
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
807 nullptr);
808 return;
809 }
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemType,
816 OrigAddresses[N].second.getPointer(CGF),
817 OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateNUWAdd(
819 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
820 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
821 } else {
822 SizeInChars =
823 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
824 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
825 }
826 Sizes.emplace_back(SizeInChars, Size);
828 CGF,
829 cast<OpaqueValueExpr>(
830 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
831 RValue::get(Size));
832 CGF.EmitVariablyModifiedType(PrivateType);
833}
834
836 llvm::Value *Size) {
837 QualType PrivateType = getPrivateType(N);
838 if (!PrivateType->isVariablyModifiedType()) {
839 assert(!Size && !Sizes[N].second &&
840 "Size should be nullptr for non-variably modified reduction "
841 "items.");
842 return;
843 }
845 CGF,
846 cast<OpaqueValueExpr>(
847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848 RValue::get(Size));
849 CGF.EmitVariablyModifiedType(PrivateType);
850}
851
853 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
854 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
855 assert(SharedAddresses.size() > N && "No variable was generated");
856 const auto *PrivateVD =
857 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
858 const OMPDeclareReductionDecl *DRD =
859 getReductionInit(ClausesData[N].ReductionOp);
860 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
861 if (DRD && DRD->getInitializer())
862 (void)DefaultInit(CGF);
863 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
864 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
865 (void)DefaultInit(CGF);
866 QualType SharedType = SharedAddresses[N].first.getType();
867 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
868 PrivateAddr, SharedAddr, SharedType);
869 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
870 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
871 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
872 PrivateVD->getType().getQualifiers(),
873 /*IsInitializer=*/false);
874 }
875}
876
878 QualType PrivateType = getPrivateType(N);
879 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
880 return DTorKind != QualType::DK_none;
881}
882
884 Address PrivateAddr) {
885 QualType PrivateType = getPrivateType(N);
886 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887 if (needCleanups(N)) {
888 PrivateAddr =
889 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
890 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
891 }
892}
893
895 LValue BaseLV) {
896 BaseTy = BaseTy.getNonReferenceType();
897 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
898 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
899 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
900 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
901 } else {
902 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
903 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
904 }
905 BaseTy = BaseTy->getPointeeType();
906 }
907 return CGF.MakeAddrLValue(
908 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
909 BaseLV.getType(), BaseLV.getBaseInfo(),
910 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
911}
912
914 Address OriginalBaseAddress, llvm::Value *Addr) {
916 Address TopTmp = Address::invalid();
917 Address MostTopTmp = Address::invalid();
918 BaseTy = BaseTy.getNonReferenceType();
919 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
920 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
921 Tmp = CGF.CreateMemTemp(BaseTy);
922 if (TopTmp.isValid())
923 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
924 else
925 MostTopTmp = Tmp;
926 TopTmp = Tmp;
927 BaseTy = BaseTy->getPointeeType();
928 }
929
930 if (Tmp.isValid()) {
932 Addr, Tmp.getElementType());
933 CGF.Builder.CreateStore(Addr, Tmp);
934 return MostTopTmp;
935 }
936
938 Addr, OriginalBaseAddress.getType());
939 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
940}
941
942static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
943 const VarDecl *OrigVD = nullptr;
944 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
945 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
947 Base = TempOASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
949 Base = TempASE->getBase()->IgnoreParenImpCasts();
950 DE = cast<DeclRefExpr>(Base);
951 OrigVD = cast<VarDecl>(DE->getDecl());
952 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
953 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
954 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
955 Base = TempASE->getBase()->IgnoreParenImpCasts();
956 DE = cast<DeclRefExpr>(Base);
957 OrigVD = cast<VarDecl>(DE->getDecl());
958 }
959 return OrigVD;
960}
961
963 Address PrivateAddr) {
964 const DeclRefExpr *DE;
965 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
966 BaseDecls.emplace_back(OrigVD);
967 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
968 LValue BaseLValue =
969 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
970 OriginalBaseLValue);
971 Address SharedAddr = SharedAddresses[N].first.getAddress();
972 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
973 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
974 SharedAddr.emitRawPointer(CGF));
975 llvm::Value *PrivatePointer =
977 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
978 llvm::Value *Ptr = CGF.Builder.CreateGEP(
979 SharedAddr.getElementType(), PrivatePointer, Adjustment);
980 return castToBase(CGF, OrigVD->getType(),
981 SharedAddresses[N].first.getType(),
982 OriginalBaseLValue.getAddress(), Ptr);
983 }
984 BaseDecls.emplace_back(
985 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
986 return PrivateAddr;
987}
988
990 const OMPDeclareReductionDecl *DRD =
991 getReductionInit(ClausesData[N].ReductionOp);
992 return DRD && DRD->getInitializer();
993}
994
995LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
996 return CGF.EmitLoadOfPointerLValue(
997 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
998 getThreadIDVariable()->getType()->castAs<PointerType>());
999}
1000
1001void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1002 if (!CGF.HaveInsertPoint())
1003 return;
1004 // 1.2.2 OpenMP Language Terminology
1005 // Structured block - An executable statement with a single entry at the
1006 // top and a single exit at the bottom.
1007 // The point of exit cannot be a branch out of the structured block.
1008 // longjmp() and throw() must not violate the entry/exit criteria.
1009 CGF.EHStack.pushTerminate();
1010 if (S)
1012 CodeGen(CGF);
1013 CGF.EHStack.popTerminate();
1014}
1015
1016LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017 CodeGenFunction &CGF) {
1018 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1019 getThreadIDVariable()->getType(),
1021}
1022
1024 QualType FieldTy) {
1025 auto *Field = FieldDecl::Create(
1026 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1027 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1028 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1029 Field->setAccess(AS_public);
1030 DC->addDecl(Field);
1031 return Field;
1032}
1033
1035 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1036 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1037 llvm::OpenMPIRBuilderConfig Config(
1038 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1039 CGM.getLangOpts().OpenMPOffloadMandatory,
1040 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042 OMPBuilder.initialize();
1043 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1045 : StringRef{});
1046 OMPBuilder.setConfig(Config);
1047
1048 // The user forces the compiler to behave as if omp requires
1049 // unified_shared_memory was given.
1050 if (CGM.getLangOpts().OpenMPForceUSM) {
1052 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1053 }
1054}
1055
1057 InternalVars.clear();
1058 // Clean non-target variable declarations possibly used only in debug info.
1059 for (const auto &Data : EmittedNonTargetVariables) {
1060 if (!Data.getValue().pointsToAliveValue())
1061 continue;
1062 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1063 if (!GV)
1064 continue;
1065 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1066 continue;
1067 GV->eraseFromParent();
1068 }
1069}
1070
1072 return OMPBuilder.createPlatformSpecificName(Parts);
1073}
1074
1075static llvm::Function *
1077 const Expr *CombinerInitializer, const VarDecl *In,
1078 const VarDecl *Out, bool IsCombiner) {
1079 // void .omp_combiner.(Ty *in, Ty *out);
1080 ASTContext &C = CGM.getContext();
1081 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1082 FunctionArgList Args;
1083 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1084 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 Args.push_back(&OmpOutParm);
1088 Args.push_back(&OmpInParm);
1089 const CGFunctionInfo &FnInfo =
1090 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1091 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1092 std::string Name = CGM.getOpenMPRuntime().getName(
1093 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1094 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1095 Name, &CGM.getModule());
1096 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1097 if (CGM.getLangOpts().Optimize) {
1098 Fn->removeFnAttr(llvm::Attribute::NoInline);
1099 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1100 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1101 }
1102 CodeGenFunction CGF(CGM);
1103 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1104 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1105 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1106 Out->getLocation());
1107 CodeGenFunction::OMPPrivateScope Scope(CGF);
1108 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1109 Scope.addPrivate(
1110 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1111 .getAddress());
1112 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1113 Scope.addPrivate(
1114 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1115 .getAddress());
1116 (void)Scope.Privatize();
1117 if (!IsCombiner && Out->hasInit() &&
1118 !CGF.isTrivialInitializer(Out->getInit())) {
1119 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1120 Out->getType().getQualifiers(),
1121 /*IsInitializer=*/true);
1122 }
1123 if (CombinerInitializer)
1124 CGF.EmitIgnoredExpr(CombinerInitializer);
1125 Scope.ForceCleanup();
1126 CGF.FinishFunction();
1127 return Fn;
1128}
1129
1132 if (UDRMap.count(D) > 0)
1133 return;
1134 llvm::Function *Combiner = emitCombinerOrInitializer(
1135 CGM, D->getType(), D->getCombiner(),
1136 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1137 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1138 /*IsCombiner=*/true);
1139 llvm::Function *Initializer = nullptr;
1140 if (const Expr *Init = D->getInitializer()) {
1142 CGM, D->getType(),
1144 : nullptr,
1145 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1146 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1147 /*IsCombiner=*/false);
1148 }
1149 UDRMap.try_emplace(D, Combiner, Initializer);
1150 if (CGF) {
1151 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1152 Decls.second.push_back(D);
1153 }
1154}
1155
1156std::pair<llvm::Function *, llvm::Function *>
1158 auto I = UDRMap.find(D);
1159 if (I != UDRMap.end())
1160 return I->second;
1161 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1162 return UDRMap.lookup(D);
1163}
1164
1165namespace {
1166// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1167// Builder if one is present.
1168struct PushAndPopStackRAII {
1169 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1170 bool HasCancel, llvm::omp::Directive Kind)
1171 : OMPBuilder(OMPBuilder) {
1172 if (!OMPBuilder)
1173 return;
1174
1175 // The following callback is the crucial part of clangs cleanup process.
1176 //
1177 // NOTE:
1178 // Once the OpenMPIRBuilder is used to create parallel regions (and
1179 // similar), the cancellation destination (Dest below) is determined via
1180 // IP. That means if we have variables to finalize we split the block at IP,
1181 // use the new block (=BB) as destination to build a JumpDest (via
1182 // getJumpDestInCurrentScope(BB)) which then is fed to
1183 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1184 // to push & pop an FinalizationInfo object.
1185 // The FiniCB will still be needed but at the point where the
1186 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1187 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1188 assert(IP.getBlock()->end() == IP.getPoint() &&
1189 "Clang CG should cause non-terminated block!");
1190 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1191 CGF.Builder.restoreIP(IP);
1193 CGF.getOMPCancelDestination(OMPD_parallel);
1194 CGF.EmitBranchThroughCleanup(Dest);
1195 };
1196
1197 // TODO: Remove this once we emit parallel regions through the
1198 // OpenMPIRBuilder as it can do this setup internally.
1199 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1200 OMPBuilder->pushFinalizationCB(std::move(FI));
1201 }
1202 ~PushAndPopStackRAII() {
1203 if (OMPBuilder)
1204 OMPBuilder->popFinalizationCB();
1205 }
1206 llvm::OpenMPIRBuilder *OMPBuilder;
1207};
1208} // namespace
1209
1211 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1212 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1213 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1214 assert(ThreadIDVar->getType()->isPointerType() &&
1215 "thread id variable must be of type kmp_int32 *");
1216 CodeGenFunction CGF(CGM, true);
1217 bool HasCancel = false;
1218 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1219 HasCancel = OPD->hasCancel();
1220 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1221 HasCancel = OPD->hasCancel();
1222 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1223 HasCancel = OPSD->hasCancel();
1224 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1225 HasCancel = OPFD->hasCancel();
1226 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1227 HasCancel = OPFD->hasCancel();
1228 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD =
1231 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1232 HasCancel = OPFD->hasCancel();
1233 else if (const auto *OPFD =
1234 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1235 HasCancel = OPFD->hasCancel();
1236
1237 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1238 // parallel region to make cancellation barriers work properly.
1239 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1240 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1241 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1242 HasCancel, OutlinedHelperName);
1243 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1245}
1246
1247std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1248 std::string Suffix = getName({"omp_outlined"});
1249 return (Name + Suffix).str();
1250}
1251
1253 return getOutlinedHelperName(CGF.CurFn->getName());
1254}
1255
1256std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1257 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1258 return (Name + Suffix).str();
1259}
1260
1263 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1264 const RegionCodeGenTy &CodeGen) {
1265 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1267 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1268 CodeGen);
1269}
1270
1273 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1274 const RegionCodeGenTy &CodeGen) {
1275 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1277 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1278 CodeGen);
1279}
1280
1282 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1283 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1285 bool Tied, unsigned &NumberOfParts) {
1286 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1287 PrePostActionTy &) {
1288 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1289 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1290 llvm::Value *TaskArgs[] = {
1291 UpLoc, ThreadID,
1292 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1293 TaskTVar->getType()->castAs<PointerType>())
1294 .getPointer(CGF)};
1295 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1296 CGM.getModule(), OMPRTL___kmpc_omp_task),
1297 TaskArgs);
1298 };
1299 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1300 UntiedCodeGen);
1301 CodeGen.setAction(Action);
1302 assert(!ThreadIDVar->getType()->isPointerType() &&
1303 "thread id variable must be of type kmp_int32 for tasks");
1304 const OpenMPDirectiveKind Region =
1305 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1306 : OMPD_task;
1307 const CapturedStmt *CS = D.getCapturedStmt(Region);
1308 bool HasCancel = false;
1309 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1310 HasCancel = TD->hasCancel();
1311 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1312 HasCancel = TD->hasCancel();
1313 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1316 HasCancel = TD->hasCancel();
1317
1318 CodeGenFunction CGF(CGM, true);
1319 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1320 InnermostKind, HasCancel, Action);
1321 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1322 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1323 if (!Tied)
1324 NumberOfParts = Action.getNumberOfParts();
1325 return Res;
1326}
1327
1329 bool AtCurrentPoint) {
1330 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1331 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1332
1333 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1334 if (AtCurrentPoint) {
1335 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1336 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1337 } else {
1338 Elem.second.ServiceInsertPt =
1339 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1340 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1341 }
1342}
1343
1345 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1346 if (Elem.second.ServiceInsertPt) {
1347 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1348 Elem.second.ServiceInsertPt = nullptr;
1349 Ptr->eraseFromParent();
1350 }
1351}
1352
1355 SmallString<128> &Buffer) {
1356 llvm::raw_svector_ostream OS(Buffer);
1357 // Build debug location
1359 OS << ";" << PLoc.getFilename() << ";";
1360 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1361 OS << FD->getQualifiedNameAsString();
1362 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1363 return OS.str();
1364}
1365
1368 unsigned Flags, bool EmitLoc) {
1369 uint32_t SrcLocStrSize;
1370 llvm::Constant *SrcLocStr;
1371 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1372 llvm::codegenoptions::NoDebugInfo) ||
1373 Loc.isInvalid()) {
1374 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1375 } else {
1376 std::string FunctionName;
1377 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1378 FunctionName = FD->getQualifiedNameAsString();
1380 const char *FileName = PLoc.getFilename();
1381 unsigned Line = PLoc.getLine();
1382 unsigned Column = PLoc.getColumn();
1383 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1384 Column, SrcLocStrSize);
1385 }
1386 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1387 return OMPBuilder.getOrCreateIdent(
1388 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1389}
1390
1393 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1394 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1395 // the clang invariants used below might be broken.
1396 if (CGM.getLangOpts().OpenMPIRBuilder) {
1397 SmallString<128> Buffer;
1398 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1399 uint32_t SrcLocStrSize;
1400 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1401 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1402 return OMPBuilder.getOrCreateThreadID(
1403 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1404 }
1405
1406 llvm::Value *ThreadID = nullptr;
1407 // Check whether we've already cached a load of the thread id in this
1408 // function.
1409 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1410 if (I != OpenMPLocThreadIDMap.end()) {
1411 ThreadID = I->second.ThreadID;
1412 if (ThreadID != nullptr)
1413 return ThreadID;
1414 }
1415 // If exceptions are enabled, do not use parameter to avoid possible crash.
1416 if (auto *OMPRegionInfo =
1417 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1418 if (OMPRegionInfo->getThreadIDVariable()) {
1419 // Check if this an outlined function with thread id passed as argument.
1420 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1421 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1422 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1423 !CGF.getLangOpts().CXXExceptions ||
1424 CGF.Builder.GetInsertBlock() == TopBlock ||
1425 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1426 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1427 TopBlock ||
1428 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1429 CGF.Builder.GetInsertBlock()) {
1430 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1431 // If value loaded in entry block, cache it and use it everywhere in
1432 // function.
1433 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1434 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1435 Elem.second.ThreadID = ThreadID;
1436 }
1437 return ThreadID;
1438 }
1439 }
1440 }
1441
1442 // This is not an outlined function region - need to call __kmpc_int32
1443 // kmpc_global_thread_num(ident_t *loc).
1444 // Generate thread id value and cache this value for use across the
1445 // function.
1446 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1447 if (!Elem.second.ServiceInsertPt)
1449 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1450 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1452 llvm::CallInst *Call = CGF.Builder.CreateCall(
1453 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1454 OMPRTL___kmpc_global_thread_num),
1455 emitUpdateLocation(CGF, Loc));
1456 Call->setCallingConv(CGF.getRuntimeCC());
1457 Elem.second.ThreadID = Call;
1458 return Call;
1459}
1460
1462 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1463 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1465 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1466 }
1467 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1468 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1469 UDRMap.erase(D);
1470 FunctionUDRMap.erase(CGF.CurFn);
1471 }
1472 auto I = FunctionUDMMap.find(CGF.CurFn);
1473 if (I != FunctionUDMMap.end()) {
1474 for(const auto *D : I->second)
1475 UDMMap.erase(D);
1476 FunctionUDMMap.erase(I);
1477 }
1480}
1481
1483 return OMPBuilder.IdentPtr;
1484}
1485
1487 if (!Kmpc_MicroTy) {
1488 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1489 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1490 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1491 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1492 }
1493 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1494}
1495
1496llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1498 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1499 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1500 if (!DevTy)
1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1502
1503 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1504 case OMPDeclareTargetDeclAttr::DT_Host:
1505 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1506 break;
1507 case OMPDeclareTargetDeclAttr::DT_NoHost:
1508 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1509 break;
1510 case OMPDeclareTargetDeclAttr::DT_Any:
1511 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1512 break;
1513 default:
1514 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1515 break;
1516 }
1517}
1518
1519llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1521 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1522 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1523 if (!MapType)
1524 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1525 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1526 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1527 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1528 break;
1529 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1530 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1531 break;
1532 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1533 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1534 break;
1535 default:
1536 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1537 break;
1538 }
1539}
1540
1541static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1542 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1543 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1544
1545 auto FileInfoCallBack = [&]() {
1547 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1548
1549 llvm::sys::fs::UniqueID ID;
1550 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1551 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1552 }
1553
1554 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1555 };
1556
1557 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1558}
1559
1561 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1562
1563 auto LinkageForVariable = [&VD, this]() {
1565 };
1566
1567 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1568
1569 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1571 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1574 VD->isExternallyVisible(),
1576 VD->getCanonicalDecl()->getBeginLoc()),
1577 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1578 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1579 LinkageForVariable);
1580
1581 if (!addr)
1582 return ConstantAddress::invalid();
1583 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1584}
1585
1586llvm::Constant *
1588 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1590 // Lookup the entry, lazily creating it if necessary.
1591 std::string Suffix = getName({"cache", ""});
1592 return OMPBuilder.getOrCreateInternalVariable(
1593 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1594}
1595
1597 const VarDecl *VD,
1598 Address VDAddr,
1600 if (CGM.getLangOpts().OpenMPUseTLS &&
1602 return VDAddr;
1603
1604 llvm::Type *VarTy = VDAddr.getElementType();
1605 llvm::Value *Args[] = {
1607 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1610 return Address(
1611 CGF.EmitRuntimeCall(
1612 OMPBuilder.getOrCreateRuntimeFunction(
1613 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1614 Args),
1615 CGF.Int8Ty, VDAddr.getAlignment());
1616}
1617
1619 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1620 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1621 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1622 // library.
1623 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1624 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1625 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1626 OMPLoc);
1627 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1628 // to register constructor/destructor for variable.
1629 llvm::Value *Args[] = {
1630 OMPLoc,
1631 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1632 Ctor, CopyCtor, Dtor};
1633 CGF.EmitRuntimeCall(
1634 OMPBuilder.getOrCreateRuntimeFunction(
1635 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1636 Args);
1637}
1638
1640 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1641 bool PerformInit, CodeGenFunction *CGF) {
1642 if (CGM.getLangOpts().OpenMPUseTLS &&
1644 return nullptr;
1645
1646 VD = VD->getDefinition(CGM.getContext());
1647 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1648 QualType ASTTy = VD->getType();
1649
1650 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1651 const Expr *Init = VD->getAnyInitializer();
1652 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1653 // Generate function that re-emits the declaration's initializer into the
1654 // threadprivate copy of the variable VD
1655 CodeGenFunction CtorCGF(CGM);
1656 FunctionArgList Args;
1657 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1658 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1660 Args.push_back(&Dst);
1661
1663 CGM.getContext().VoidPtrTy, Args);
1664 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1665 std::string Name = getName({"__kmpc_global_ctor_", ""});
1666 llvm::Function *Fn =
1668 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1669 Args, Loc, Loc);
1670 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1671 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1673 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1674 VDAddr.getAlignment());
1675 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1676 /*IsInitializer=*/true);
1677 ArgVal = CtorCGF.EmitLoadOfScalar(
1678 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1680 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1681 CtorCGF.FinishFunction();
1682 Ctor = Fn;
1683 }
1685 // Generate function that emits destructor call for the threadprivate copy
1686 // of the variable VD
1687 CodeGenFunction DtorCGF(CGM);
1688 FunctionArgList Args;
1689 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1690 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1692 Args.push_back(&Dst);
1693
1695 CGM.getContext().VoidTy, Args);
1696 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1697 std::string Name = getName({"__kmpc_global_dtor_", ""});
1698 llvm::Function *Fn =
1700 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1701 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1702 Loc, Loc);
1703 // Create a scope with an artificial location for the body of this function.
1704 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1705 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1706 DtorCGF.GetAddrOfLocalVar(&Dst),
1707 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1708 DtorCGF.emitDestroy(
1709 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1710 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1711 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1712 DtorCGF.FinishFunction();
1713 Dtor = Fn;
1714 }
1715 // Do not emit init function if it is not required.
1716 if (!Ctor && !Dtor)
1717 return nullptr;
1718
1719 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1720 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1721 /*isVarArg=*/false)
1722 ->getPointerTo();
1723 // Copying constructor for the threadprivate variable.
1724 // Must be NULL - reserved by runtime, but currently it requires that this
1725 // parameter is always NULL. Otherwise it fires assertion.
1726 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1727 if (Ctor == nullptr) {
1728 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1729 /*isVarArg=*/false)
1730 ->getPointerTo();
1731 Ctor = llvm::Constant::getNullValue(CtorTy);
1732 }
1733 if (Dtor == nullptr) {
1734 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1735 /*isVarArg=*/false)
1736 ->getPointerTo();
1737 Dtor = llvm::Constant::getNullValue(DtorTy);
1738 }
1739 if (!CGF) {
1740 auto *InitFunctionTy =
1741 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1742 std::string Name = getName({"__omp_threadprivate_init_", ""});
1743 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1744 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1745 CodeGenFunction InitCGF(CGM);
1746 FunctionArgList ArgList;
1747 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1748 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1749 Loc, Loc);
1750 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1751 InitCGF.FinishFunction();
1752 return InitFunction;
1753 }
1754 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1755 }
1756 return nullptr;
1757}
1758
1760 llvm::GlobalValue *GV) {
1761 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1762 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1763
1764 // We only need to handle active 'indirect' declare target functions.
1765 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1766 return;
1767
1768 // Get a mangled name to store the new device global in.
1769 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1771 SmallString<128> Name;
1772 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1773
1774 // We need to generate a new global to hold the address of the indirectly
1775 // called device function. Doing this allows us to keep the visibility and
1776 // linkage of the associated function unchanged while allowing the runtime to
1777 // access its value.
1778 llvm::GlobalValue *Addr = GV;
1779 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1780 Addr = new llvm::GlobalVariable(
1782 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1783 nullptr, llvm::GlobalValue::NotThreadLocal,
1784 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1785 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1786 }
1787
1788 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1790 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1791 llvm::GlobalValue::WeakODRLinkage);
1792}
1793
1795 QualType VarType,
1796 StringRef Name) {
1797 std::string Suffix = getName({"artificial", ""});
1798 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1799 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1800 VarLVType, Twine(Name).concat(Suffix).str());
1801 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1803 GAddr->setThreadLocal(/*Val=*/true);
1804 return Address(GAddr, GAddr->getValueType(),
1806 }
1807 std::string CacheSuffix = getName({"cache", ""});
1808 llvm::Value *Args[] = {
1812 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1813 /*isSigned=*/false),
1814 OMPBuilder.getOrCreateInternalVariable(
1816 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1817 return Address(
1819 CGF.EmitRuntimeCall(
1820 OMPBuilder.getOrCreateRuntimeFunction(
1821 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1822 Args),
1823 VarLVType->getPointerTo(/*AddrSpace=*/0)),
1824 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1825}
1826
1828 const RegionCodeGenTy &ThenGen,
1829 const RegionCodeGenTy &ElseGen) {
1830 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1831
1832 // If the condition constant folds and can be elided, try to avoid emitting
1833 // the condition and the dead arm of the if/else.
1834 bool CondConstant;
1835 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1836 if (CondConstant)
1837 ThenGen(CGF);
1838 else
1839 ElseGen(CGF);
1840 return;
1841 }
1842
1843 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1844 // emit the conditional branch.
1845 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1846 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1847 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1848 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1849
1850 // Emit the 'then' code.
1851 CGF.EmitBlock(ThenBlock);
1852 ThenGen(CGF);
1853 CGF.EmitBranch(ContBlock);
1854 // Emit the 'else' code if present.
1855 // There is no need to emit line number for unconditional branch.
1857 CGF.EmitBlock(ElseBlock);
1858 ElseGen(CGF);
1859 // There is no need to emit line number for unconditional branch.
1861 CGF.EmitBranch(ContBlock);
1862 // Emit the continuation block for code after the if.
1863 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1864}
1865
1867 llvm::Function *OutlinedFn,
1868 ArrayRef<llvm::Value *> CapturedVars,
1869 const Expr *IfCond,
1870 llvm::Value *NumThreads) {
1871 if (!CGF.HaveInsertPoint())
1872 return;
1873 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1874 auto &M = CGM.getModule();
1875 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1876 this](CodeGenFunction &CGF, PrePostActionTy &) {
1877 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1879 llvm::Value *Args[] = {
1880 RTLoc,
1881 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1882 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1884 RealArgs.append(std::begin(Args), std::end(Args));
1885 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1886
1887 llvm::FunctionCallee RTLFn =
1888 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1889 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1890 };
1891 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1892 this](CodeGenFunction &CGF, PrePostActionTy &) {
1894 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1895 // Build calls:
1896 // __kmpc_serialized_parallel(&Loc, GTid);
1897 llvm::Value *Args[] = {RTLoc, ThreadID};
1898 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1899 M, OMPRTL___kmpc_serialized_parallel),
1900 Args);
1901
1902 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1903 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1904 RawAddress ZeroAddrBound =
1906 /*Name=*/".bound.zero.addr");
1907 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1909 // ThreadId for serialized parallels is 0.
1910 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1911 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1912 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1913
1914 // Ensure we do not inline the function. This is trivially true for the ones
1915 // passed to __kmpc_fork_call but the ones called in serialized regions
1916 // could be inlined. This is not a perfect but it is closer to the invariant
1917 // we want, namely, every data environment starts with a new function.
1918 // TODO: We should pass the if condition to the runtime function and do the
1919 // handling there. Much cleaner code.
1920 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1921 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1922 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1923
1924 // __kmpc_end_serialized_parallel(&Loc, GTid);
1925 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1926 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1927 M, OMPRTL___kmpc_end_serialized_parallel),
1928 EndArgs);
1929 };
1930 if (IfCond) {
1931 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1932 } else {
1933 RegionCodeGenTy ThenRCG(ThenGen);
1934 ThenRCG(CGF);
1935 }
1936}
1937
1938// If we're inside an (outlined) parallel region, use the region info's
1939// thread-ID variable (it is passed in a first argument of the outlined function
1940// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1941// regular serial code region, get thread ID by calling kmp_int32
1942// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1943// return the address of that temp.
1946 if (auto *OMPRegionInfo =
1947 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1948 if (OMPRegionInfo->getThreadIDVariable())
1949 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1950
1951 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1952 QualType Int32Ty =
1953 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1954 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1955 CGF.EmitStoreOfScalar(ThreadID,
1956 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1957
1958 return ThreadIDTemp;
1959}
1960
1961llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1962 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1963 std::string Name = getName({Prefix, "var"});
1964 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1965}
1966
1967namespace {
1968/// Common pre(post)-action for different OpenMP constructs.
1969class CommonActionTy final : public PrePostActionTy {
1970 llvm::FunctionCallee EnterCallee;
1971 ArrayRef<llvm::Value *> EnterArgs;
1972 llvm::FunctionCallee ExitCallee;
1973 ArrayRef<llvm::Value *> ExitArgs;
1974 bool Conditional;
1975 llvm::BasicBlock *ContBlock = nullptr;
1976
1977public:
1978 CommonActionTy(llvm::FunctionCallee EnterCallee,
1979 ArrayRef<llvm::Value *> EnterArgs,
1980 llvm::FunctionCallee ExitCallee,
1981 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1982 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1983 ExitArgs(ExitArgs), Conditional(Conditional) {}
1984 void Enter(CodeGenFunction &CGF) override {
1985 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1986 if (Conditional) {
1987 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1988 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1989 ContBlock = CGF.createBasicBlock("omp_if.end");
1990 // Generate the branch (If-stmt)
1991 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1992 CGF.EmitBlock(ThenBlock);
1993 }
1994 }
1995 void Done(CodeGenFunction &CGF) {
1996 // Emit the rest of blocks/branches
1997 CGF.EmitBranch(ContBlock);
1998 CGF.EmitBlock(ContBlock, true);
1999 }
2000 void Exit(CodeGenFunction &CGF) override {
2001 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2002 }
2003};
2004} // anonymous namespace
2005
2007 StringRef CriticalName,
2008 const RegionCodeGenTy &CriticalOpGen,
2009 SourceLocation Loc, const Expr *Hint) {
2010 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2011 // CriticalOpGen();
2012 // __kmpc_end_critical(ident_t *, gtid, Lock);
2013 // Prepare arguments and build a call to __kmpc_critical
2014 if (!CGF.HaveInsertPoint())
2015 return;
2016 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2017 getCriticalRegionLock(CriticalName)};
2018 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2019 std::end(Args));
2020 if (Hint) {
2021 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2022 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2023 }
2024 CommonActionTy Action(
2025 OMPBuilder.getOrCreateRuntimeFunction(
2026 CGM.getModule(),
2027 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2028 EnterArgs,
2029 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2030 OMPRTL___kmpc_end_critical),
2031 Args);
2032 CriticalOpGen.setAction(Action);
2033 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2034}
2035
2037 const RegionCodeGenTy &MasterOpGen,
2039 if (!CGF.HaveInsertPoint())
2040 return;
2041 // if(__kmpc_master(ident_t *, gtid)) {
2042 // MasterOpGen();
2043 // __kmpc_end_master(ident_t *, gtid);
2044 // }
2045 // Prepare arguments and build a call to __kmpc_master
2046 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2047 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2048 CGM.getModule(), OMPRTL___kmpc_master),
2049 Args,
2050 OMPBuilder.getOrCreateRuntimeFunction(
2051 CGM.getModule(), OMPRTL___kmpc_end_master),
2052 Args,
2053 /*Conditional=*/true);
2054 MasterOpGen.setAction(Action);
2055 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2056 Action.Done(CGF);
2057}
2058
2060 const RegionCodeGenTy &MaskedOpGen,
2061 SourceLocation Loc, const Expr *Filter) {
2062 if (!CGF.HaveInsertPoint())
2063 return;
2064 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2065 // MaskedOpGen();
2066 // __kmpc_end_masked(iden_t *, gtid);
2067 // }
2068 // Prepare arguments and build a call to __kmpc_masked
2069 llvm::Value *FilterVal = Filter
2070 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2071 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2072 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2073 FilterVal};
2074 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2075 getThreadID(CGF, Loc)};
2076 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2077 CGM.getModule(), OMPRTL___kmpc_masked),
2078 Args,
2079 OMPBuilder.getOrCreateRuntimeFunction(
2080 CGM.getModule(), OMPRTL___kmpc_end_masked),
2081 ArgsEnd,
2082 /*Conditional=*/true);
2083 MaskedOpGen.setAction(Action);
2084 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2085 Action.Done(CGF);
2086}
2087
2090 if (!CGF.HaveInsertPoint())
2091 return;
2092 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2093 OMPBuilder.createTaskyield(CGF.Builder);
2094 } else {
2095 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2096 llvm::Value *Args[] = {
2098 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2099 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2100 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2101 Args);
2102 }
2103
2104 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2105 Region->emitUntiedSwitch(CGF);
2106}
2107
2109 const RegionCodeGenTy &TaskgroupOpGen,
2111 if (!CGF.HaveInsertPoint())
2112 return;
2113 // __kmpc_taskgroup(ident_t *, gtid);
2114 // TaskgroupOpGen();
2115 // __kmpc_end_taskgroup(ident_t *, gtid);
2116 // Prepare arguments and build a call to __kmpc_taskgroup
2117 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2118 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2119 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2120 Args,
2121 OMPBuilder.getOrCreateRuntimeFunction(
2122 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2123 Args);
2124 TaskgroupOpGen.setAction(Action);
2125 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2126}
2127
2128/// Given an array of pointers to variables, project the address of a
2129/// given variable.
2131 unsigned Index, const VarDecl *Var) {
2132 // Pull out the pointer to the variable.
2133 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2134 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2135
2136 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2137 return Address(
2138 CGF.Builder.CreateBitCast(
2139 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2140 ElemTy, CGF.getContext().getDeclAlign(Var));
2141}
2142
2144 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2145 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2146 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2148 ASTContext &C = CGM.getContext();
2149 // void copy_func(void *LHSArg, void *RHSArg);
2150 FunctionArgList Args;
2151 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2153 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2155 Args.push_back(&LHSArg);
2156 Args.push_back(&RHSArg);
2157 const auto &CGFI =
2158 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2159 std::string Name =
2160 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2161 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2162 llvm::GlobalValue::InternalLinkage, Name,
2163 &CGM.getModule());
2165 Fn->setDoesNotRecurse();
2166 CodeGenFunction CGF(CGM);
2167 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2168 // Dest = (void*[n])(LHSArg);
2169 // Src = (void*[n])(RHSArg);
2171 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2172 ArgsElemType->getPointerTo()),
2173 ArgsElemType, CGF.getPointerAlign());
2175 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2176 ArgsElemType->getPointerTo()),
2177 ArgsElemType, CGF.getPointerAlign());
2178 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2179 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2180 // ...
2181 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2182 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2183 const auto *DestVar =
2184 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2185 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2186
2187 const auto *SrcVar =
2188 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2189 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2190
2191 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2192 QualType Type = VD->getType();
2193 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2194 }
2195 CGF.FinishFunction();
2196 return Fn;
2197}
2198
2200 const RegionCodeGenTy &SingleOpGen,
2202 ArrayRef<const Expr *> CopyprivateVars,
2203 ArrayRef<const Expr *> SrcExprs,
2204 ArrayRef<const Expr *> DstExprs,
2205 ArrayRef<const Expr *> AssignmentOps) {
2206 if (!CGF.HaveInsertPoint())
2207 return;
2208 assert(CopyprivateVars.size() == SrcExprs.size() &&
2209 CopyprivateVars.size() == DstExprs.size() &&
2210 CopyprivateVars.size() == AssignmentOps.size());
2212 // int32 did_it = 0;
2213 // if(__kmpc_single(ident_t *, gtid)) {
2214 // SingleOpGen();
2215 // __kmpc_end_single(ident_t *, gtid);
2216 // did_it = 1;
2217 // }
2218 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2219 // <copy_func>, did_it);
2220
2221 Address DidIt = Address::invalid();
2222 if (!CopyprivateVars.empty()) {
2223 // int32 did_it = 0;
2224 QualType KmpInt32Ty =
2225 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2226 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2227 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2228 }
2229 // Prepare arguments and build a call to __kmpc_single
2230 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2231 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2232 CGM.getModule(), OMPRTL___kmpc_single),
2233 Args,
2234 OMPBuilder.getOrCreateRuntimeFunction(
2235 CGM.getModule(), OMPRTL___kmpc_end_single),
2236 Args,
2237 /*Conditional=*/true);
2238 SingleOpGen.setAction(Action);
2239 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2240 if (DidIt.isValid()) {
2241 // did_it = 1;
2242 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2243 }
2244 Action.Done(CGF);
2245 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2246 // <copy_func>, did_it);
2247 if (DidIt.isValid()) {
2248 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2249 QualType CopyprivateArrayTy = C.getConstantArrayType(
2250 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2251 /*IndexTypeQuals=*/0);
2252 // Create a list of all private variables for copyprivate.
2253 Address CopyprivateList =
2254 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2255 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2256 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2257 CGF.Builder.CreateStore(
2259 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2260 CGF.VoidPtrTy),
2261 Elem);
2262 }
2263 // Build function that copies private values from single region to all other
2264 // threads in the corresponding parallel region.
2265 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2266 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2267 SrcExprs, DstExprs, AssignmentOps, Loc);
2268 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2270 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2271 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2272 llvm::Value *Args[] = {
2273 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2274 getThreadID(CGF, Loc), // i32 <gtid>
2275 BufSize, // size_t <buf_size>
2276 CL.emitRawPointer(CGF), // void *<copyprivate list>
2277 CpyFn, // void (*) (void *, void *) <copy_func>
2278 DidItVal // i32 did_it
2279 };
2280 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2281 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2282 Args);
2283 }
2284}
2285
2287 const RegionCodeGenTy &OrderedOpGen,
2288 SourceLocation Loc, bool IsThreads) {
2289 if (!CGF.HaveInsertPoint())
2290 return;
2291 // __kmpc_ordered(ident_t *, gtid);
2292 // OrderedOpGen();
2293 // __kmpc_end_ordered(ident_t *, gtid);
2294 // Prepare arguments and build a call to __kmpc_ordered
2295 if (IsThreads) {
2296 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2297 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2298 CGM.getModule(), OMPRTL___kmpc_ordered),
2299 Args,
2300 OMPBuilder.getOrCreateRuntimeFunction(
2301 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2302 Args);
2303 OrderedOpGen.setAction(Action);
2304 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2305 return;
2306 }
2307 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2308}
2309
2311 unsigned Flags;
2312 if (Kind == OMPD_for)
2313 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2314 else if (Kind == OMPD_sections)
2315 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2316 else if (Kind == OMPD_single)
2317 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2318 else if (Kind == OMPD_barrier)
2319 Flags = OMP_IDENT_BARRIER_EXPL;
2320 else
2321 Flags = OMP_IDENT_BARRIER_IMPL;
2322 return Flags;
2323}
2324
2326 CodeGenFunction &CGF, const OMPLoopDirective &S,
2327 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2328 // Check if the loop directive is actually a doacross loop directive. In this
2329 // case choose static, 1 schedule.
2330 if (llvm::any_of(
2331 S.getClausesOfKind<OMPOrderedClause>(),
2332 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2333 ScheduleKind = OMPC_SCHEDULE_static;
2334 // Chunk size is 1 in this case.
2335 llvm::APInt ChunkSize(32, 1);
2336 ChunkExpr = IntegerLiteral::Create(
2337 CGF.getContext(), ChunkSize,
2338 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2339 SourceLocation());
2340 }
2341}
2342
2344 OpenMPDirectiveKind Kind, bool EmitChecks,
2345 bool ForceSimpleCall) {
2346 // Check if we should use the OMPBuilder
2347 auto *OMPRegionInfo =
2348 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2349 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2350 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2351 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2352 return;
2353 }
2354
2355 if (!CGF.HaveInsertPoint())
2356 return;
2357 // Build call __kmpc_cancel_barrier(loc, thread_id);
2358 // Build call __kmpc_barrier(loc, thread_id);
2359 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2360 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2361 // thread_id);
2362 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2363 getThreadID(CGF, Loc)};
2364 if (OMPRegionInfo) {
2365 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2366 llvm::Value *Result = CGF.EmitRuntimeCall(
2367 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2368 OMPRTL___kmpc_cancel_barrier),
2369 Args);
2370 if (EmitChecks) {
2371 // if (__kmpc_cancel_barrier()) {
2372 // exit from construct;
2373 // }
2374 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2375 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2376 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2377 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2378 CGF.EmitBlock(ExitBB);
2379 // exit from construct;
2380 CodeGenFunction::JumpDest CancelDestination =
2381 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2382 CGF.EmitBranchThroughCleanup(CancelDestination);
2383 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2384 }
2385 return;
2386 }
2387 }
2388 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2389 CGM.getModule(), OMPRTL___kmpc_barrier),
2390 Args);
2391}
2392
2394 Expr *ME, bool IsFatal) {
2395 llvm::Value *MVL =
2396 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2397 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2398 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2399 // *message)
2400 llvm::Value *Args[] = {
2401 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2402 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2403 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2404 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2405 CGM.getModule(), OMPRTL___kmpc_error),
2406 Args);
2407}
2408
2409/// Map the OpenMP loop schedule to the runtime enumeration.
2410static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2411 bool Chunked, bool Ordered) {
2412 switch (ScheduleKind) {
2413 case OMPC_SCHEDULE_static:
2414 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2415 : (Ordered ? OMP_ord_static : OMP_sch_static);
2416 case OMPC_SCHEDULE_dynamic:
2417 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2418 case OMPC_SCHEDULE_guided:
2419 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2420 case OMPC_SCHEDULE_runtime:
2421 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2422 case OMPC_SCHEDULE_auto:
2423 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2425 assert(!Chunked && "chunk was specified but schedule kind not known");
2426 return Ordered ? OMP_ord_static : OMP_sch_static;
2427 }
2428 llvm_unreachable("Unexpected runtime schedule");
2429}
2430
2431/// Map the OpenMP distribute schedule to the runtime enumeration.
2432static OpenMPSchedType
2434 // only static is allowed for dist_schedule
2435 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2436}
2437
2439 bool Chunked) const {
2440 OpenMPSchedType Schedule =
2441 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2442 return Schedule == OMP_sch_static;
2443}
2444
2446 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2447 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2448 return Schedule == OMP_dist_sch_static;
2449}
2450
2452 bool Chunked) const {
2453 OpenMPSchedType Schedule =
2454 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2455 return Schedule == OMP_sch_static_chunked;
2456}
2457
2459 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2460 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2461 return Schedule == OMP_dist_sch_static_chunked;
2462}
2463
2465 OpenMPSchedType Schedule =
2466 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2467 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2468 return Schedule != OMP_sch_static;
2469}
2470
2471static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2474 int Modifier = 0;
2475 switch (M1) {
2476 case OMPC_SCHEDULE_MODIFIER_monotonic:
2477 Modifier = OMP_sch_modifier_monotonic;
2478 break;
2479 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2480 Modifier = OMP_sch_modifier_nonmonotonic;
2481 break;
2482 case OMPC_SCHEDULE_MODIFIER_simd:
2483 if (Schedule == OMP_sch_static_chunked)
2484 Schedule = OMP_sch_static_balanced_chunked;
2485 break;
2488 break;
2489 }
2490 switch (M2) {
2491 case OMPC_SCHEDULE_MODIFIER_monotonic:
2492 Modifier = OMP_sch_modifier_monotonic;
2493 break;
2494 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2495 Modifier = OMP_sch_modifier_nonmonotonic;
2496 break;
2497 case OMPC_SCHEDULE_MODIFIER_simd:
2498 if (Schedule == OMP_sch_static_chunked)
2499 Schedule = OMP_sch_static_balanced_chunked;
2500 break;
2503 break;
2504 }
2505 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2506 // If the static schedule kind is specified or if the ordered clause is
2507 // specified, and if the nonmonotonic modifier is not specified, the effect is
2508 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2509 // modifier is specified, the effect is as if the nonmonotonic modifier is
2510 // specified.
2511 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2512 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2513 Schedule == OMP_sch_static_balanced_chunked ||
2514 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2515 Schedule == OMP_dist_sch_static_chunked ||
2516 Schedule == OMP_dist_sch_static))
2517 Modifier = OMP_sch_modifier_nonmonotonic;
2518 }
2519 return Schedule | Modifier;
2520}
2521
2524 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2525 bool Ordered, const DispatchRTInput &DispatchValues) {
2526 if (!CGF.HaveInsertPoint())
2527 return;
2528 OpenMPSchedType Schedule = getRuntimeSchedule(
2529 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2530 assert(Ordered ||
2531 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2532 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2533 Schedule != OMP_sch_static_balanced_chunked));
2534 // Call __kmpc_dispatch_init(
2535 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2536 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2537 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2538
2539 // If the Chunk was not specified in the clause - use default value 1.
2540 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2541 : CGF.Builder.getIntN(IVSize, 1);
2542 llvm::Value *Args[] = {
2543 emitUpdateLocation(CGF, Loc),
2544 getThreadID(CGF, Loc),
2545 CGF.Builder.getInt32(addMonoNonMonoModifier(
2546 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2547 DispatchValues.LB, // Lower
2548 DispatchValues.UB, // Upper
2549 CGF.Builder.getIntN(IVSize, 1), // Stride
2550 Chunk // Chunk
2551 };
2552 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2553 Args);
2554}
2555
2557 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2558 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2560 const CGOpenMPRuntime::StaticRTInput &Values) {
2561 if (!CGF.HaveInsertPoint())
2562 return;
2563
2564 assert(!Values.Ordered);
2565 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2566 Schedule == OMP_sch_static_balanced_chunked ||
2567 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2568 Schedule == OMP_dist_sch_static ||
2569 Schedule == OMP_dist_sch_static_chunked);
2570
2571 // Call __kmpc_for_static_init(
2572 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2573 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2574 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2575 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2576 llvm::Value *Chunk = Values.Chunk;
2577 if (Chunk == nullptr) {
2578 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2579 Schedule == OMP_dist_sch_static) &&
2580 "expected static non-chunked schedule");
2581 // If the Chunk was not specified in the clause - use default value 1.
2582 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2583 } else {
2584 assert((Schedule == OMP_sch_static_chunked ||
2585 Schedule == OMP_sch_static_balanced_chunked ||
2586 Schedule == OMP_ord_static_chunked ||
2587 Schedule == OMP_dist_sch_static_chunked) &&
2588 "expected static chunked schedule");
2589 }
2590 llvm::Value *Args[] = {
2591 UpdateLocation,
2592 ThreadId,
2593 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2594 M2)), // Schedule type
2595 Values.IL.emitRawPointer(CGF), // &isLastIter
2596 Values.LB.emitRawPointer(CGF), // &LB
2597 Values.UB.emitRawPointer(CGF), // &UB
2598 Values.ST.emitRawPointer(CGF), // &Stride
2599 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2600 Chunk // Chunk
2601 };
2602 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2603}
2604
2607 OpenMPDirectiveKind DKind,
2608 const OpenMPScheduleTy &ScheduleKind,
2609 const StaticRTInput &Values) {
2610 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2611 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2612 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2613 "Expected loop-based or sections-based directive.");
2614 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2616 ? OMP_IDENT_WORK_LOOP
2617 : OMP_IDENT_WORK_SECTIONS);
2618 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2619 llvm::FunctionCallee StaticInitFunction =
2620 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2621 false);
2623 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2624 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2625}
2626
2630 const CGOpenMPRuntime::StaticRTInput &Values) {
2631 OpenMPSchedType ScheduleNum =
2632 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2633 llvm::Value *UpdatedLocation =
2634 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2635 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2636 llvm::FunctionCallee StaticInitFunction;
2637 bool isGPUDistribute =
2638 CGM.getLangOpts().OpenMPIsTargetDevice &&
2639 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2640 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2641 Values.IVSize, Values.IVSigned, isGPUDistribute);
2642
2643 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2644 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2646}
2647
2650 OpenMPDirectiveKind DKind) {
2651 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2652 DKind == OMPD_sections) &&
2653 "Expected distribute, for, or sections directive kind");
2654 if (!CGF.HaveInsertPoint())
2655 return;
2656 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2657 llvm::Value *Args[] = {
2660 (DKind == OMPD_target_teams_loop)
2661 ? OMP_IDENT_WORK_DISTRIBUTE
2662 : isOpenMPLoopDirective(DKind)
2663 ? OMP_IDENT_WORK_LOOP
2664 : OMP_IDENT_WORK_SECTIONS),
2665 getThreadID(CGF, Loc)};
2667 if (isOpenMPDistributeDirective(DKind) &&
2668 CGM.getLangOpts().OpenMPIsTargetDevice &&
2669 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2670 CGF.EmitRuntimeCall(
2671 OMPBuilder.getOrCreateRuntimeFunction(
2672 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2673 Args);
2674 else
2675 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2676 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2677 Args);
2678}
2679
2682 unsigned IVSize,
2683 bool IVSigned) {
2684 if (!CGF.HaveInsertPoint())
2685 return;
2686 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2687 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2688 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2689 Args);
2690}
2691
2693 SourceLocation Loc, unsigned IVSize,
2694 bool IVSigned, Address IL,
2695 Address LB, Address UB,
2696 Address ST) {
2697 // Call __kmpc_dispatch_next(
2698 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2699 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2700 // kmp_int[32|64] *p_stride);
2701 llvm::Value *Args[] = {
2703 IL.emitRawPointer(CGF), // &isLastIter
2704 LB.emitRawPointer(CGF), // &Lower
2705 UB.emitRawPointer(CGF), // &Upper
2706 ST.emitRawPointer(CGF) // &Stride
2707 };
2708 llvm::Value *Call = CGF.EmitRuntimeCall(
2709 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2710 return CGF.EmitScalarConversion(
2711 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2712 CGF.getContext().BoolTy, Loc);
2713}
2714
2716 llvm::Value *NumThreads,
2718 if (!CGF.HaveInsertPoint())
2719 return;
2720 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2721 llvm::Value *Args[] = {
2723 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2724 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2725 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2726 Args);
2727}
2728
2730 ProcBindKind ProcBind,
2732 if (!CGF.HaveInsertPoint())
2733 return;
2734 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2735 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2736 llvm::Value *Args[] = {
2738 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2739 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2740 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2741 Args);
2742}
2743
2745 SourceLocation Loc, llvm::AtomicOrdering AO) {
2746 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2747 OMPBuilder.createFlush(CGF.Builder);
2748 } else {
2749 if (!CGF.HaveInsertPoint())
2750 return;
2751 // Build call void __kmpc_flush(ident_t *loc)
2752 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2753 CGM.getModule(), OMPRTL___kmpc_flush),
2754 emitUpdateLocation(CGF, Loc));
2755 }
2756}
2757
2758namespace {
2759/// Indexes of fields for type kmp_task_t.
2760enum KmpTaskTFields {
2761 /// List of shared variables.
2762 KmpTaskTShareds,
2763 /// Task routine.
2764 KmpTaskTRoutine,
2765 /// Partition id for the untied tasks.
2766 KmpTaskTPartId,
2767 /// Function with call of destructors for private variables.
2768 Data1,
2769 /// Task priority.
2770 Data2,
2771 /// (Taskloops only) Lower bound.
2772 KmpTaskTLowerBound,
2773 /// (Taskloops only) Upper bound.
2774 KmpTaskTUpperBound,
2775 /// (Taskloops only) Stride.
2776 KmpTaskTStride,
2777 /// (Taskloops only) Is last iteration flag.
2778 KmpTaskTLastIter,
2779 /// (Taskloops only) Reduction data.
2780 KmpTaskTReductions,
2781};
2782} // anonymous namespace
2783
2785 // If we are in simd mode or there are no entries, we don't need to do
2786 // anything.
2787 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2788 return;
2789
2790 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2791 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2792 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2794 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2795 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2797 I != E; ++I) {
2798 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2799 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2801 I->getFirst(), EntryInfo.Line, 1);
2802 break;
2803 }
2804 }
2805 }
2806 switch (Kind) {
2807 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2808 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2809 DiagnosticsEngine::Error, "Offloading entry for target region in "
2810 "%0 is incorrect: either the "
2811 "address or the ID is invalid.");
2812 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2813 } break;
2814 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2815 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2816 DiagnosticsEngine::Error, "Offloading entry for declare target "
2817 "variable %0 is incorrect: the "
2818 "address is invalid.");
2819 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2820 } break;
2821 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2822 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2824 "Offloading entry for declare target variable is incorrect: the "
2825 "address is invalid.");
2826 CGM.getDiags().Report(DiagID);
2827 } break;
2828 }
2829 };
2830
2831 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2832}
2833
2835 if (!KmpRoutineEntryPtrTy) {
2836 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2838 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2840 KmpRoutineEntryPtrQTy = C.getPointerType(
2841 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2843 }
2844}
2845
2846namespace {
2847struct PrivateHelpersTy {
2848 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2849 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2850 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2851 PrivateElemInit(PrivateElemInit) {}
2852 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2853 const Expr *OriginalRef = nullptr;
2854 const VarDecl *Original = nullptr;
2855 const VarDecl *PrivateCopy = nullptr;
2856 const VarDecl *PrivateElemInit = nullptr;
2857 bool isLocalPrivate() const {
2858 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2859 }
2860};
2861typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2862} // anonymous namespace
2863
2864static bool isAllocatableDecl(const VarDecl *VD) {
2865 const VarDecl *CVD = VD->getCanonicalDecl();
2866 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2867 return false;
2868 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2869 // Use the default allocation.
2870 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2871 !AA->getAllocator());
2872}
2873
2874static RecordDecl *
2876 if (!Privates.empty()) {
2877 ASTContext &C = CGM.getContext();
2878 // Build struct .kmp_privates_t. {
2879 // /* private vars */
2880 // };
2881 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2882 RD->startDefinition();
2883 for (const auto &Pair : Privates) {
2884 const VarDecl *VD = Pair.second.Original;
2886 // If the private variable is a local variable with lvalue ref type,
2887 // allocate the pointer instead of the pointee type.
2888 if (Pair.second.isLocalPrivate()) {
2889 if (VD->getType()->isLValueReferenceType())
2890 Type = C.getPointerType(Type);
2891 if (isAllocatableDecl(VD))
2892 Type = C.getPointerType(Type);
2893 }
2895 if (VD->hasAttrs()) {
2896 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2897 E(VD->getAttrs().end());
2898 I != E; ++I)
2899 FD->addAttr(*I);
2900 }
2901 }
2902 RD->completeDefinition();
2903 return RD;
2904 }
2905 return nullptr;
2906}
2907
2908static RecordDecl *
2910 QualType KmpInt32Ty,
2911 QualType KmpRoutineEntryPointerQTy) {
2912 ASTContext &C = CGM.getContext();
2913 // Build struct kmp_task_t {
2914 // void * shareds;
2915 // kmp_routine_entry_t routine;
2916 // kmp_int32 part_id;
2917 // kmp_cmplrdata_t data1;
2918 // kmp_cmplrdata_t data2;
2919 // For taskloops additional fields:
2920 // kmp_uint64 lb;
2921 // kmp_uint64 ub;
2922 // kmp_int64 st;
2923 // kmp_int32 liter;
2924 // void * reductions;
2925 // };
2926 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2927 UD->startDefinition();
2928 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2929 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2930 UD->completeDefinition();
2931 QualType KmpCmplrdataTy = C.getRecordType(UD);
2932 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2933 RD->startDefinition();
2934 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2935 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2936 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2937 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2938 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2939 if (isOpenMPTaskLoopDirective(Kind)) {
2940 QualType KmpUInt64Ty =
2941 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2942 QualType KmpInt64Ty =
2943 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2944 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2945 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2946 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2947 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2948 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2949 }
2950 RD->completeDefinition();
2951 return RD;
2952}
2953
2954static RecordDecl *
2956 ArrayRef<PrivateDataTy> Privates) {
2957 ASTContext &C = CGM.getContext();
2958 // Build struct kmp_task_t_with_privates {
2959 // kmp_task_t task_data;
2960 // .kmp_privates_t. privates;
2961 // };
2962 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2963 RD->startDefinition();
2964 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2965 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2966 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2967 RD->completeDefinition();
2968 return RD;
2969}
2970
2971/// Emit a proxy function which accepts kmp_task_t as the second
2972/// argument.
2973/// \code
2974/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2975/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2976/// For taskloops:
2977/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2978/// tt->reductions, tt->shareds);
2979/// return 0;
2980/// }
2981/// \endcode
2982static llvm::Function *
2984 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2985 QualType KmpTaskTWithPrivatesPtrQTy,
2986 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2987 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2988 llvm::Value *TaskPrivatesMap) {
2989 ASTContext &C = CGM.getContext();
2990 FunctionArgList Args;
2991 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2993 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2994 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2996 Args.push_back(&GtidArg);
2997 Args.push_back(&TaskTypeArg);
2998 const auto &TaskEntryFnInfo =
2999 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3000 llvm::FunctionType *TaskEntryTy =
3001 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3002 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3003 auto *TaskEntry = llvm::Function::Create(
3004 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3005 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3006 TaskEntry->setDoesNotRecurse();
3007 CodeGenFunction CGF(CGM);
3008 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3009 Loc, Loc);
3010
3011 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3012 // tt,
3013 // For taskloops:
3014 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3015 // tt->task_data.shareds);
3016 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3017 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3018 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3019 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3020 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3021 const auto *KmpTaskTWithPrivatesQTyRD =
3022 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3023 LValue Base =
3024 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3025 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3026 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3027 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3028 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3029
3030 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3031 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3032 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3033 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3034 CGF.ConvertTypeForMem(SharedsPtrTy));
3035
3036 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3037 llvm::Value *PrivatesParam;
3038 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3039 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3040 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3041 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3042 } else {
3043 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3044 }
3045
3046 llvm::Value *CommonArgs[] = {
3047 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3048 CGF.Builder
3050 CGF.VoidPtrTy, CGF.Int8Ty)
3051 .emitRawPointer(CGF)};
3052 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3053 std::end(CommonArgs));
3054 if (isOpenMPTaskLoopDirective(Kind)) {
3055 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3056 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3057 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3058 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3059 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3060 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3061 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3062 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3063 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3064 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3065 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3066 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3067 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3068 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3069 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3070 CallArgs.push_back(LBParam);
3071 CallArgs.push_back(UBParam);
3072 CallArgs.push_back(StParam);
3073 CallArgs.push_back(LIParam);
3074 CallArgs.push_back(RParam);
3075 }
3076 CallArgs.push_back(SharedsParam);
3077
3078 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3079 CallArgs);
3080 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3081 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3082 CGF.FinishFunction();
3083 return TaskEntry;
3084}
3085
3088 QualType KmpInt32Ty,
3089 QualType KmpTaskTWithPrivatesPtrQTy,
3090 QualType KmpTaskTWithPrivatesQTy) {
3091 ASTContext &C = CGM.getContext();
3092 FunctionArgList Args;
3093 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3095 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3096 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3098 Args.push_back(&GtidArg);
3099 Args.push_back(&TaskTypeArg);
3100 const auto &DestructorFnInfo =
3101 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3102 llvm::FunctionType *DestructorFnTy =
3103 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3104 std::string Name =
3105 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3106 auto *DestructorFn =
3107 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3108 Name, &CGM.getModule());
3109 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3110 DestructorFnInfo);
3111 DestructorFn->setDoesNotRecurse();
3112 CodeGenFunction CGF(CGM);
3113 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3114 Args, Loc, Loc);
3115
3117 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3118 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3119 const auto *KmpTaskTWithPrivatesQTyRD =
3120 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3121 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3122 Base = CGF.EmitLValueForField(Base, *FI);
3123 for (const auto *Field :
3124 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3125 if (QualType::DestructionKind DtorKind =
3126 Field->getType().isDestructedType()) {
3127 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3128 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3129 }
3130 }
3131 CGF.FinishFunction();
3132 return DestructorFn;
3133}
3134
3135/// Emit a privates mapping function for correct handling of private and
3136/// firstprivate variables.
3137/// \code
3138/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3139/// **noalias priv1,..., <tyn> **noalias privn) {
3140/// *priv1 = &.privates.priv1;
3141/// ...;
3142/// *privn = &.privates.privn;
3143/// }
3144/// \endcode
3145static llvm::Value *
3147 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3148 ArrayRef<PrivateDataTy> Privates) {
3149 ASTContext &C = CGM.getContext();
3150 FunctionArgList Args;
3151 ImplicitParamDecl TaskPrivatesArg(
3152 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3153 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3155 Args.push_back(&TaskPrivatesArg);
3156 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3157 unsigned Counter = 1;
3158 for (const Expr *E : Data.PrivateVars) {
3159 Args.push_back(ImplicitParamDecl::Create(
3160 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3161 C.getPointerType(C.getPointerType(E->getType()))
3162 .withConst()
3163 .withRestrict(),
3165 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3166 PrivateVarsPos[VD] = Counter;
3167 ++Counter;
3168 }
3169 for (const Expr *E : Data.FirstprivateVars) {
3170 Args.push_back(ImplicitParamDecl::Create(
3171 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3172 C.getPointerType(C.getPointerType(E->getType()))
3173 .withConst()
3174 .withRestrict(),
3176 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3177 PrivateVarsPos[VD] = Counter;
3178 ++Counter;
3179 }
3180 for (const Expr *E : Data.LastprivateVars) {
3181 Args.push_back(ImplicitParamDecl::Create(
3182 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3183 C.getPointerType(C.getPointerType(E->getType()))
3184 .withConst()
3185 .withRestrict(),
3187 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3188 PrivateVarsPos[VD] = Counter;
3189 ++Counter;
3190 }
3191 for (const VarDecl *VD : Data.PrivateLocals) {
3193 if (VD->getType()->isLValueReferenceType())
3194 Ty = C.getPointerType(Ty);
3195 if (isAllocatableDecl(VD))
3196 Ty = C.getPointerType(Ty);
3197 Args.push_back(ImplicitParamDecl::Create(
3198 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3199 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3201 PrivateVarsPos[VD] = Counter;
3202 ++Counter;
3203 }
3204 const auto &TaskPrivatesMapFnInfo =
3205 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3206 llvm::FunctionType *TaskPrivatesMapTy =
3207 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3208 std::string Name =
3209 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3210 auto *TaskPrivatesMap = llvm::Function::Create(
3211 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3212 &CGM.getModule());
3213 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3214 TaskPrivatesMapFnInfo);
3215 if (CGM.getLangOpts().Optimize) {
3216 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3217 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3218 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3219 }
3220 CodeGenFunction CGF(CGM);
3221 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3222 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3223
3224 // *privi = &.privates.privi;
3226 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3227 TaskPrivatesArg.getType()->castAs<PointerType>());
3228 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3229 Counter = 0;
3230 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3231 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3232 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3233 LValue RefLVal =
3234 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3235 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3236 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3237 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3238 ++Counter;
3239 }
3240 CGF.FinishFunction();
3241 return TaskPrivatesMap;
3242}
3243
3244/// Emit initialization for private variables in task-based directives.
3246 const OMPExecutableDirective &D,
3247 Address KmpTaskSharedsPtr, LValue TDBase,
3248 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3249 QualType SharedsTy, QualType SharedsPtrTy,
3250 const OMPTaskDataTy &Data,
3251 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3252 ASTContext &C = CGF.getContext();
3253 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3254 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3256 ? OMPD_taskloop
3257 : OMPD_task;
3258 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3259 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3260 LValue SrcBase;
3261 bool IsTargetTask =
3264 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3265 // PointersArray, SizesArray, and MappersArray. The original variables for
3266 // these arrays are not captured and we get their addresses explicitly.
3267 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3268 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3269 SrcBase = CGF.MakeAddrLValue(
3271 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3272 CGF.ConvertTypeForMem(SharedsTy)),
3273 SharedsTy);
3274 }
3275 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3276 for (const PrivateDataTy &Pair : Privates) {
3277 // Do not initialize private locals.
3278 if (Pair.second.isLocalPrivate()) {
3279 ++FI;
3280 continue;
3281 }
3282 const VarDecl *VD = Pair.second.PrivateCopy;
3283 const Expr *Init = VD->getAnyInitializer();
3284 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3285 !CGF.isTrivialInitializer(Init)))) {
3286 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3287 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3288 const VarDecl *OriginalVD = Pair.second.Original;
3289 // Check if the variable is the target-based BasePointersArray,
3290 // PointersArray, SizesArray, or MappersArray.
3291 LValue SharedRefLValue;
3292 QualType Type = PrivateLValue.getType();
3293 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3294 if (IsTargetTask && !SharedField) {
3295 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3296 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3297 cast<CapturedDecl>(OriginalVD->getDeclContext())
3298 ->getNumParams() == 0 &&
3299 isa<TranslationUnitDecl>(
3300 cast<CapturedDecl>(OriginalVD->getDeclContext())
3301 ->getDeclContext()) &&
3302 "Expected artificial target data variable.");
3303 SharedRefLValue =
3304 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3305 } else if (ForDup) {
3306 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3307 SharedRefLValue = CGF.MakeAddrLValue(
3308 SharedRefLValue.getAddress().withAlignment(
3309 C.getDeclAlign(OriginalVD)),
3310 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3311 SharedRefLValue.getTBAAInfo());
3312 } else if (CGF.LambdaCaptureFields.count(
3313 Pair.second.Original->getCanonicalDecl()) > 0 ||
3314 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3315 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3316 } else {
3317 // Processing for implicitly captured variables.
3318 InlinedOpenMPRegionRAII Region(
3319 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3320 /*HasCancel=*/false, /*NoInheritance=*/true);
3321 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3322 }
3323 if (Type->isArrayType()) {
3324 // Initialize firstprivate array.
3325 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3326 // Perform simple memcpy.
3327 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3328 } else {
3329 // Initialize firstprivate array using element-by-element
3330 // initialization.
3332 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3333 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3334 Address SrcElement) {
3335 // Clean up any temporaries needed by the initialization.
3336 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3337 InitScope.addPrivate(Elem, SrcElement);
3338 (void)InitScope.Privatize();
3339 // Emit initialization for single element.
3340 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3341 CGF, &CapturesInfo);
3342 CGF.EmitAnyExprToMem(Init, DestElement,
3343 Init->getType().getQualifiers(),
3344 /*IsInitializer=*/false);
3345 });
3346 }
3347 } else {
3348 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3349 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3350 (void)InitScope.Privatize();
3351 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3352 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3353 /*capturedByInit=*/false);
3354 }
3355 } else {
3356 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3357 }
3358 }
3359 ++FI;
3360 }
3361}
3362
3363/// Check if duplication function is required for taskloops.
3365 ArrayRef<PrivateDataTy> Privates) {
3366 bool InitRequired = false;
3367 for (const PrivateDataTy &Pair : Privates) {
3368 if (Pair.second.isLocalPrivate())
3369 continue;
3370 const VarDecl *VD = Pair.second.PrivateCopy;
3371 const Expr *Init = VD->getAnyInitializer();
3372 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3374 if (InitRequired)
3375 break;
3376 }
3377 return InitRequired;
3378}
3379
3380
3381/// Emit task_dup function (for initialization of
3382/// private/firstprivate/lastprivate vars and last_iter flag)
3383/// \code
3384/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3385/// lastpriv) {
3386/// // setup lastprivate flag
3387/// task_dst->last = lastpriv;
3388/// // could be constructor calls here...
3389/// }
3390/// \endcode
3391static llvm::Value *
3393 const OMPExecutableDirective &D,
3394 QualType KmpTaskTWithPrivatesPtrQTy,
3395 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3396 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3397 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3398 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3399 ASTContext &C = CGM.getContext();
3400 FunctionArgList Args;
3401 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3402 KmpTaskTWithPrivatesPtrQTy,
3404 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3405 KmpTaskTWithPrivatesPtrQTy,
3407 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3409 Args.push_back(&DstArg);
3410 Args.push_back(&SrcArg);
3411 Args.push_back(&LastprivArg);
3412 const auto &TaskDupFnInfo =
3413 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3414 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3415 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3416 auto *TaskDup = llvm::Function::Create(
3417 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3418 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3419 TaskDup->setDoesNotRecurse();
3420 CodeGenFunction CGF(CGM);
3421 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3422 Loc);
3423
3424 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3425 CGF.GetAddrOfLocalVar(&DstArg),
3426 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3427 // task_dst->liter = lastpriv;
3428 if (WithLastIter) {
3429 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3431 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3432 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3433 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3434 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3435 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3436 }
3437
3438 // Emit initial values for private copies (if any).
3439 assert(!Privates.empty());
3440 Address KmpTaskSharedsPtr = Address::invalid();
3441 if (!Data.FirstprivateVars.empty()) {
3442 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3443 CGF.GetAddrOfLocalVar(&SrcArg),
3444 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3446 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3447 KmpTaskSharedsPtr = Address(
3449 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3450 KmpTaskTShareds)),
3451 Loc),
3452 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3453 }
3454 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3455 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3456 CGF.FinishFunction();
3457 return TaskDup;
3458}
3459
3460/// Checks if destructor function is required to be generated.
3461/// \return true if cleanups are required, false otherwise.
3462static bool
3463checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3464 ArrayRef<PrivateDataTy> Privates) {
3465 for (const PrivateDataTy &P : Privates) {
3466 if (P.second.isLocalPrivate())
3467 continue;
3468 QualType Ty = P.second.Original->getType().getNonReferenceType();
3469 if (Ty.isDestructedType())
3470 return true;
3471 }
3472 return false;
3473}
3474
3475namespace {
3476/// Loop generator for OpenMP iterator expression.
3477class OMPIteratorGeneratorScope final
3478 : public CodeGenFunction::OMPPrivateScope {
3479 CodeGenFunction &CGF;
3480 const OMPIteratorExpr *E = nullptr;
3483 OMPIteratorGeneratorScope() = delete;
3484 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3485
3486public:
3487 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3488 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3489 if (!E)
3490 return;
3492 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3493 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3494 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3495 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3496 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3497 addPrivate(
3498 HelperData.CounterVD,
3499 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3500 }
3501 Privatize();
3502
3503 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3504 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3505 LValue CLVal =
3506 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3507 HelperData.CounterVD->getType());
3508 // Counter = 0;
3510 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3511 CLVal);
3512 CodeGenFunction::JumpDest &ContDest =
3513 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3514 CodeGenFunction::JumpDest &ExitDest =
3515 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3516 // N = <number-of_iterations>;
3517 llvm::Value *N = Uppers[I];
3518 // cont:
3519 // if (Counter < N) goto body; else goto exit;
3520 CGF.EmitBlock(ContDest.getBlock());
3521 auto *CVal =
3522 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3523 llvm::Value *Cmp =
3525 ? CGF.Builder.CreateICmpSLT(CVal, N)
3526 : CGF.Builder.CreateICmpULT(CVal, N);
3527 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3528 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3529 // body:
3530 CGF.EmitBlock(BodyBB);
3531 // Iteri = Begini + Counter * Stepi;
3532 CGF.EmitIgnoredExpr(HelperData.Update);
3533 }
3534 }
3535 ~OMPIteratorGeneratorScope() {
3536 if (!E)
3537 return;
3538 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3539 // Counter = Counter + 1;
3540 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3541 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3542 // goto cont;
3543 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3544 // exit:
3545 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3546 }
3547 }
3548};
3549} // namespace
3550
3551static std::pair<llvm::Value *, llvm::Value *>
3553 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3554 llvm::Value *Addr;
3555 if (OASE) {
3556 const Expr *Base = OASE->getBase();
3557 Addr = CGF.EmitScalarExpr(Base);
3558 } else {
3559 Addr = CGF.EmitLValue(E).getPointer(CGF);
3560 }
3561 llvm::Value *SizeVal;
3562 QualType Ty = E->getType();
3563 if (OASE) {
3564 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3565 for (const Expr *SE : OASE->getDimensions()) {
3566 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3567 Sz = CGF.EmitScalarConversion(
3568 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3569 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3570 }
3571 } else if (const auto *ASE =
3572 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3573 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3574 Address UpAddrAddress = UpAddrLVal.getAddress();
3575 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3576 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3577 /*Idx0=*/1);
3578 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3579 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3580 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3581 } else {
3582 SizeVal = CGF.getTypeSize(Ty);
3583 }
3584 return std::make_pair(Addr, SizeVal);
3585}
3586
3587/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3588static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3589 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3590 if (KmpTaskAffinityInfoTy.isNull()) {
3591 RecordDecl *KmpAffinityInfoRD =
3592 C.buildImplicitRecord("kmp_task_affinity_info_t");
3593 KmpAffinityInfoRD->startDefinition();
3594 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3595 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3596 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3597 KmpAffinityInfoRD->completeDefinition();
3598 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3599 }
3600}
3601
3604 const OMPExecutableDirective &D,
3605 llvm::Function *TaskFunction, QualType SharedsTy,
3606 Address Shareds, const OMPTaskDataTy &Data) {
3609 // Aggregate privates and sort them by the alignment.
3610 const auto *I = Data.PrivateCopies.begin();
3611 for (const Expr *E : Data.PrivateVars) {
3612 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3613 Privates.emplace_back(
3614 C.getDeclAlign(VD),
3615 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3616 /*PrivateElemInit=*/nullptr));
3617 ++I;
3618 }
3619 I = Data.FirstprivateCopies.begin();
3620 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3621 for (const Expr *E : Data.FirstprivateVars) {
3622 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3623 Privates.emplace_back(
3624 C.getDeclAlign(VD),
3625 PrivateHelpersTy(
3626 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3627 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3628 ++I;
3629 ++IElemInitRef;
3630 }
3631 I = Data.LastprivateCopies.begin();
3632 for (const Expr *E : Data.LastprivateVars) {
3633 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3634 Privates.emplace_back(
3635 C.getDeclAlign(VD),
3636 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3637 /*PrivateElemInit=*/nullptr));
3638 ++I;
3639 }
3640 for (const VarDecl *VD : Data.PrivateLocals) {
3641 if (isAllocatableDecl(VD))
3642 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3643 else
3644 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3645 }
3646 llvm::stable_sort(Privates,
3647 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3648 return L.first > R.first;
3649 });
3650 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3651 // Build type kmp_routine_entry_t (if not built yet).
3652 emitKmpRoutineEntryT(KmpInt32Ty);
3653 // Build type kmp_task_t (if not built yet).
3657 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3658 }
3660 } else {
3661 assert((D.getDirectiveKind() == OMPD_task ||
3664 "Expected taskloop, task or target directive");
3665 if (SavedKmpTaskTQTy.isNull()) {
3667 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3668 }
3670 }
3671 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3672 // Build particular struct kmp_task_t for the given task.
3673 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3675 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3676 QualType KmpTaskTWithPrivatesPtrQTy =
3677 C.getPointerType(KmpTaskTWithPrivatesQTy);
3678 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3679 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3680 KmpTaskTWithPrivatesTy->getPointerTo();
3681 llvm::Value *KmpTaskTWithPrivatesTySize =
3682 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3683 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3684
3685 // Emit initial values for private copies (if any).
3686 llvm::Value *TaskPrivatesMap = nullptr;
3687 llvm::Type *TaskPrivatesMapTy =
3688 std::next(TaskFunction->arg_begin(), 3)->getType();
3689 if (!Privates.empty()) {
3690 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3691 TaskPrivatesMap =
3692 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3693 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3694 TaskPrivatesMap, TaskPrivatesMapTy);
3695 } else {
3696 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3697 cast<llvm::PointerType>(TaskPrivatesMapTy));
3698 }
3699 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3700 // kmp_task_t *tt);
3701 llvm::Function *TaskEntry = emitProxyTaskFunction(
3702 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3703 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3704 TaskPrivatesMap);
3705
3706 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3707 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3708 // kmp_routine_entry_t *task_entry);
3709 // Task flags. Format is taken from
3710 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3711 // description of kmp_tasking_flags struct.
3712 enum {
3713 TiedFlag = 0x1,
3714 FinalFlag = 0x2,
3715 DestructorsFlag = 0x8,
3716 PriorityFlag = 0x20,
3717 DetachableFlag = 0x40,
3718 };
3719 unsigned Flags = Data.Tied ? TiedFlag : 0;
3720 bool NeedsCleanup = false;
3721 if (!Privates.empty()) {
3722 NeedsCleanup =
3723 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3724 if (NeedsCleanup)
3725 Flags = Flags | DestructorsFlag;
3726 }
3727 if (Data.Priority.getInt())
3728 Flags = Flags | PriorityFlag;
3730 Flags = Flags | DetachableFlag;
3731 llvm::Value *TaskFlags =
3732 Data.Final.getPointer()
3733 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3734 CGF.Builder.getInt32(FinalFlag),
3735 CGF.Builder.getInt32(/*C=*/0))
3736 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3737 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3738 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3740 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3742 TaskEntry, KmpRoutineEntryPtrTy)};
3743 llvm::Value *NewTask;
3745 // Check if we have any device clause associated with the directive.
3746 const Expr *Device = nullptr;
3747 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3748 Device = C->getDevice();
3749 // Emit device ID if any otherwise use default value.
3750 llvm::Value *DeviceID;
3751 if (Device)
3752 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3753 CGF.Int64Ty, /*isSigned=*/true);
3754 else
3755 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3756 AllocArgs.push_back(DeviceID);
3757 NewTask = CGF.EmitRuntimeCall(
3758 OMPBuilder.getOrCreateRuntimeFunction(
3759 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3760 AllocArgs);
3761 } else {
3762 NewTask =
3763 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3764 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3765 AllocArgs);
3766 }
3767 // Emit detach clause initialization.
3768 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3769 // task_descriptor);
3770 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3771 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3772 LValue EvtLVal = CGF.EmitLValue(Evt);
3773
3774 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3775 // int gtid, kmp_task_t *task);
3776 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3777 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3778 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3779 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3780 OMPBuilder.getOrCreateRuntimeFunction(
3781 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3782 {Loc, Tid, NewTask});
3783 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3784 Evt->getExprLoc());
3785 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3786 }
3787 // Process affinity clauses.
3789 // Process list of affinity data.
3791 Address AffinitiesArray = Address::invalid();
3792 // Calculate number of elements to form the array of affinity data.
3793 llvm::Value *NumOfElements = nullptr;
3794 unsigned NumAffinities = 0;
3795 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3796 if (const Expr *Modifier = C->getModifier()) {
3797 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3798 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3799 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3800 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3801 NumOfElements =
3802 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3803 }
3804 } else {
3805 NumAffinities += C->varlist_size();
3806 }
3807 }
3809 // Fields ids in kmp_task_affinity_info record.
3810 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3811
3812 QualType KmpTaskAffinityInfoArrayTy;
3813 if (NumOfElements) {
3814 NumOfElements = CGF.Builder.CreateNUWAdd(
3815 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3816 auto *OVE = new (C) OpaqueValueExpr(
3817 Loc,
3818 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3819 VK_PRValue);
3820 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3821 RValue::get(NumOfElements));
3822 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3824 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3825 // Properly emit variable-sized array.
3826 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3828 CGF.EmitVarDecl(*PD);
3829 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3830 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3831 /*isSigned=*/false);
3832 } else {
3833 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3835 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3836 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3837 AffinitiesArray =
3838 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3839 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3840 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3841 /*isSigned=*/false);
3842 }
3843
3844 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3845 // Fill array by elements without iterators.
3846 unsigned Pos = 0;
3847 bool HasIterator = false;
3848 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3849 if (C->getModifier()) {
3850 HasIterator = true;
3851 continue;
3852 }
3853 for (const Expr *E : C->varlists()) {
3854 llvm::Value *Addr;
3855 llvm::Value *Size;
3856 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3857 LValue Base =
3858 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3860 // affs[i].base_addr = &<Affinities[i].second>;
3861 LValue BaseAddrLVal = CGF.EmitLValueForField(
3862 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3863 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3864 BaseAddrLVal);
3865 // affs[i].len = sizeof(<Affinities[i].second>);
3866 LValue LenLVal = CGF.EmitLValueForField(
3867 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3868 CGF.EmitStoreOfScalar(Size, LenLVal);
3869 ++Pos;
3870 }
3871 }
3872 LValue PosLVal;
3873 if (HasIterator) {
3874 PosLVal = CGF.MakeAddrLValue(
3875 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3876 C.getSizeType());
3877 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3878 }
3879 // Process elements with iterators.
3880 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3881 const Expr *Modifier = C->getModifier();
3882 if (!Modifier)
3883 continue;
3884 OMPIteratorGeneratorScope IteratorScope(
3885 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3886 for (const Expr *E : C->varlists()) {
3887 llvm::Value *Addr;
3888 llvm::Value *Size;
3889 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3890 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3891 LValue Base =
3892 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3894 // affs[i].base_addr = &<Affinities[i].second>;
3895 LValue BaseAddrLVal = CGF.EmitLValueForField(
3896 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3897 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3898 BaseAddrLVal);
3899 // affs[i].len = sizeof(<Affinities[i].second>);
3900 LValue LenLVal = CGF.EmitLValueForField(
3901 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3902 CGF.EmitStoreOfScalar(Size, LenLVal);
3903 Idx = CGF.Builder.CreateNUWAdd(
3904 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3905 CGF.EmitStoreOfScalar(Idx, PosLVal);
3906 }
3907 }
3908 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3909 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3910 // naffins, kmp_task_affinity_info_t *affin_list);
3911 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3912 llvm::Value *GTid = getThreadID(CGF, Loc);
3913 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3914 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3915 // FIXME: Emit the function and ignore its result for now unless the
3916 // runtime function is properly implemented.
3917 (void)CGF.EmitRuntimeCall(
3918 OMPBuilder.getOrCreateRuntimeFunction(
3919 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3920 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3921 }
3922 llvm::Value *NewTaskNewTaskTTy =
3924 NewTask, KmpTaskTWithPrivatesPtrTy);
3925 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3926 KmpTaskTWithPrivatesQTy);
3927 LValue TDBase =
3928 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3929 // Fill the data in the resulting kmp_task_t record.
3930 // Copy shareds if there are any.
3931 Address KmpTaskSharedsPtr = Address::invalid();
3932 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3933 KmpTaskSharedsPtr = Address(
3934 CGF.EmitLoadOfScalar(
3936 TDBase,
3937 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3938 Loc),
3939 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3940 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3941 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3942 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3943 }
3944 // Emit initial values for private copies (if any).
3946 if (!Privates.empty()) {
3947 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3948 SharedsTy, SharedsPtrTy, Data, Privates,
3949 /*ForDup=*/false);
3951 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3952 Result.TaskDupFn = emitTaskDupFunction(
3953 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3954 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3955 /*WithLastIter=*/!Data.LastprivateVars.empty());
3956 }
3957 }
3958 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3959 enum { Priority = 0, Destructors = 1 };
3960 // Provide pointer to function with destructors for privates.
3961 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3962 const RecordDecl *KmpCmplrdataUD =
3963 (*FI)->getType()->getAsUnionType()->getDecl();
3964 if (NeedsCleanup) {
3965 llvm::Value *DestructorFn = emitDestructorsFunction(
3966 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3967 KmpTaskTWithPrivatesQTy);
3968 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3969 LValue DestructorsLV = CGF.EmitLValueForField(
3970 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3972 DestructorFn, KmpRoutineEntryPtrTy),
3973 DestructorsLV);
3974 }
3975 // Set priority.
3976 if (Data.Priority.getInt()) {
3977 LValue Data2LV = CGF.EmitLValueForField(
3978 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3979 LValue PriorityLV = CGF.EmitLValueForField(
3980 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3981 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3982 }
3983 Result.NewTask = NewTask;
3984 Result.TaskEntry = TaskEntry;
3985 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3986 Result.TDBase = TDBase;
3987 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3988 return Result;
3989}
3990
3991/// Translates internal dependency kind into the runtime kind.
3993 RTLDependenceKindTy DepKind;
3994 switch (K) {
3995 case OMPC_DEPEND_in:
3996 DepKind = RTLDependenceKindTy::DepIn;
3997 break;
3998 // Out and InOut dependencies must use the same code.
3999 case OMPC_DEPEND_out:
4000 case OMPC_DEPEND_inout:
4001 DepKind = RTLDependenceKindTy::DepInOut;
4002 break;
4003 case OMPC_DEPEND_mutexinoutset:
4004 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4005 break;
4006 case OMPC_DEPEND_inoutset:
4007 DepKind = RTLDependenceKindTy::DepInOutSet;
4008 break;
4009 case OMPC_DEPEND_outallmemory:
4010 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4011 break;
4012 case OMPC_DEPEND_source:
4013 case OMPC_DEPEND_sink:
4014 case OMPC_DEPEND_depobj:
4015 case OMPC_DEPEND_inoutallmemory:
4017 llvm_unreachable("Unknown task dependence type");
4018 }
4019 return DepKind;
4020}
4021
4022/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4023static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4024 QualType &FlagsTy) {
4025 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4026 if (KmpDependInfoTy.isNull()) {
4027 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4028 KmpDependInfoRD->startDefinition();
4029 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4030 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4031 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4032 KmpDependInfoRD->completeDefinition();
4033 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4034 }
4035}
4036
4037std::pair<llvm::Value *, LValue>
4041 QualType FlagsTy;
4042 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4043 RecordDecl *KmpDependInfoRD =
4044 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4045 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4047 DepobjLVal.getAddress().withElementType(
4048 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4049 KmpDependInfoPtrTy->castAs<PointerType>());
4050 Address DepObjAddr = CGF.Builder.CreateGEP(
4051 CGF, Base.getAddress(),
4052 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4053 LValue NumDepsBase = CGF.MakeAddrLValue(
4054 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4055 // NumDeps = deps[i].base_addr;
4056 LValue BaseAddrLVal = CGF.EmitLValueForField(
4057 NumDepsBase,
4058 *std::next(KmpDependInfoRD->field_begin(),
4059 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4060 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4061 return std::make_pair(NumDeps, Base);
4062}
4063
4064static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4065 llvm::PointerUnion<unsigned *, LValue *> Pos,
4067 Address DependenciesArray) {
4068 CodeGenModule &CGM = CGF.CGM;
4069 ASTContext &C = CGM.getContext();
4070 QualType FlagsTy;
4071 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4072 RecordDecl *KmpDependInfoRD =
4073 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4074 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4075
4076 OMPIteratorGeneratorScope IteratorScope(
4077 CGF, cast_or_null<OMPIteratorExpr>(
4078 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4079 : nullptr));
4080 for (const Expr *E : Data.DepExprs) {
4081 llvm::Value *Addr;
4082 llvm::Value *Size;
4083
4084 // The expression will be a nullptr in the 'omp_all_memory' case.
4085 if (E) {
4086 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4087 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4088 } else {
4089 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4090 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4091 }
4092 LValue Base;
4093 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4094 Base = CGF.MakeAddrLValue(
4095 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4096 } else {
4097 assert(E && "Expected a non-null expression");
4098 LValue &PosLVal = *Pos.get<LValue *>();
4099 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4100 Base = CGF.MakeAddrLValue(
4101 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4102 }
4103 // deps[i].base_addr = &<Dependencies[i].second>;
4104 LValue BaseAddrLVal = CGF.EmitLValueForField(
4105 Base,
4106 *std::next(KmpDependInfoRD->field_begin(),
4107 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4108 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4109 // deps[i].len = sizeof(<Dependencies[i].second>);
4110 LValue LenLVal = CGF.EmitLValueForField(
4111 Base, *std::next(KmpDependInfoRD->field_begin(),
4112 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4113 CGF.EmitStoreOfScalar(Size, LenLVal);
4114 // deps[i].flags = <Dependencies[i].first>;
4115 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4116 LValue FlagsLVal = CGF.EmitLValueForField(
4117 Base,
4118 *std::next(KmpDependInfoRD->field_begin(),
4119 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4121 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4122 FlagsLVal);
4123 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4124 ++(*P);
4125 } else {
4126 LValue &PosLVal = *Pos.get<LValue *>();
4127 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4128 Idx = CGF.Builder.CreateNUWAdd(Idx,
4129 llvm::ConstantInt::get(Idx->getType(), 1));
4130 CGF.EmitStoreOfScalar(Idx, PosLVal);
4131 }
4132 }
4133}
4134
4136 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4138 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4139 "Expected depobj dependency kind.");
4141 SmallVector<LValue, 4> SizeLVals;
4142 ASTContext &C = CGF.getContext();
4143 {
4144 OMPIteratorGeneratorScope IteratorScope(
4145 CGF, cast_or_null<OMPIteratorExpr>(
4146 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4147 : nullptr));
4148 for (const Expr *E : Data.DepExprs) {
4149 llvm::Value *NumDeps;
4150 LValue Base;
4151 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4152 std::tie(NumDeps, Base) =
4153 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4154 LValue NumLVal = CGF.MakeAddrLValue(
4155 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4156 C.getUIntPtrType());
4157 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4158 NumLVal.getAddress());
4159 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4160 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4161 CGF.EmitStoreOfScalar(Add, NumLVal);
4162 SizeLVals.push_back(NumLVal);
4163 }
4164 }
4165 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4166 llvm::Value *Size =
4167 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4168 Sizes.push_back(Size);
4169 }
4170 return Sizes;
4171}
4172
4174 QualType &KmpDependInfoTy,
4175 LValue PosLVal,
4177 Address DependenciesArray) {
4178 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4179 "Expected depobj dependency kind.");
4180 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4181 {
4182 OMPIteratorGeneratorScope IteratorScope(
4183 CGF, cast_or_null<OMPIteratorExpr>(
4184 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4185 : nullptr));
4186 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4187 const Expr *E = Data.DepExprs[I];
4188 llvm::Value *NumDeps;
4189 LValue Base;
4190 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4191 std::tie(NumDeps, Base) =
4192 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4193
4194 // memcopy dependency data.
4195 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4196 ElSize,
4197 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4198 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4199 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4200 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4201
4202 // Increase pos.
4203 // pos += size;
4204 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4205 CGF.EmitStoreOfScalar(Add, PosLVal);
4206 }
4207 }
4208}
4209
4210std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4213 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4214 return D.DepExprs.empty();
4215 }))
4216 return std::make_pair(nullptr, Address::invalid());
4217 // Process list of dependencies.
4219 Address DependenciesArray = Address::invalid();
4220 llvm::Value *NumOfElements = nullptr;
4221 unsigned NumDependencies = std::accumulate(
4222 Dependencies.begin(), Dependencies.end(), 0,
4223 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4224 return D.DepKind == OMPC_DEPEND_depobj
4225 ? V
4226 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4227 });
4228 QualType FlagsTy;
4229 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4230 bool HasDepobjDeps = false;
4231 bool HasRegularWithIterators = false;
4232 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4233 llvm::Value *NumOfRegularWithIterators =
4234 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4235 // Calculate number of depobj dependencies and regular deps with the
4236 // iterators.
4237 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4238 if (D.DepKind == OMPC_DEPEND_depobj) {
4241 for (llvm::Value *Size : Sizes) {
4242 NumOfDepobjElements =
4243 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4244 }
4245 HasDepobjDeps = true;
4246 continue;
4247 }
4248 // Include number of iterations, if any.
4249
4250 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4251 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4252 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4253 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4254 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4255 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4256 NumOfRegularWithIterators =
4257 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4258 }
4259 HasRegularWithIterators = true;
4260 continue;
4261 }
4262 }
4263
4264 QualType KmpDependInfoArrayTy;
4265 if (HasDepobjDeps || HasRegularWithIterators) {
4266 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4267 /*isSigned=*/false);
4268 if (HasDepobjDeps) {
4269 NumOfElements =
4270 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4271 }
4272 if (HasRegularWithIterators) {
4273 NumOfElements =
4274 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4275 }
4276 auto *OVE = new (C) OpaqueValueExpr(
4277 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4278 VK_PRValue);
4279 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4280 RValue::get(NumOfElements));
4281 KmpDependInfoArrayTy =
4282 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4283 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4284 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4285 // Properly emit variable-sized array.
4286 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4288 CGF.EmitVarDecl(*PD);
4289 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4290 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4291 /*isSigned=*/false);
4292 } else {
4293 KmpDependInfoArrayTy = C.getConstantArrayType(
4294 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4295 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4296 DependenciesArray =
4297 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4298 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4299 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4300 /*isSigned=*/false);
4301 }
4302 unsigned Pos = 0;
4303 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4304 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4305 Dependencies[I].IteratorExpr)
4306 continue;
4307 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4308 DependenciesArray);
4309 }
4310 // Copy regular dependencies with iterators.
4311 LValue PosLVal = CGF.MakeAddrLValue(
4312 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4313 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4314 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4315 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4316 !Dependencies[I].IteratorExpr)
4317 continue;
4318 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4319 DependenciesArray);
4320 }
4321 // Copy final depobj arrays without iterators.
4322 if (HasDepobjDeps) {
4323 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4324 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4325 continue;
4326 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4327 DependenciesArray);
4328 }
4329 }
4330 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4331 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4332 return std::make_pair(NumOfElements, DependenciesArray);
4333}
4334
4336 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4338 if (Dependencies.DepExprs.empty())
4339 return Address::invalid();
4340 // Process list of dependencies.
4342 Address DependenciesArray = Address::invalid();
4343 unsigned NumDependencies = Dependencies.DepExprs.size();
4344 QualType FlagsTy;
4345 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4346 RecordDecl *KmpDependInfoRD =
4347 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4348
4349 llvm::Value *Size;
4350 // Define type kmp_depend_info[<Dependencies.size()>];
4351 // For depobj reserve one extra element to store the number of elements.
4352 // It is required to handle depobj(x) update(in) construct.
4353 // kmp_depend_info[<Dependencies.size()>] deps;
4354 llvm::Value *NumDepsVal;
4355 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4356 if (const auto *IE =
4357 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4358 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4359 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4360 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4361 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4362 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4363 }
4364 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4365 NumDepsVal);
4366 CharUnits SizeInBytes =
4367 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4368 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4369 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4370 NumDepsVal =
4371 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4372 } else {
4373 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4374 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4375 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4376 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4377 Size = CGM.getSize(Sz.alignTo(Align));
4378 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4379 }
4380 // Need to allocate on the dynamic memory.
4381 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4382 // Use default allocator.
4383 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4384 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4385
4386 llvm::Value *Addr =
4387 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4388 CGM.getModule(), OMPRTL___kmpc_alloc),
4389 Args, ".dep.arr.addr");
4390 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4392 Addr, KmpDependInfoLlvmTy->getPointerTo());
4393 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4394 // Write number of elements in the first element of array for depobj.
4395 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4396 // deps[i].base_addr = NumDependencies;
4397 LValue BaseAddrLVal = CGF.EmitLValueForField(
4398 Base,
4399 *std::next(KmpDependInfoRD->field_begin(),
4400 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4401 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4402 llvm::PointerUnion<unsigned *, LValue *> Pos;
4403 unsigned Idx = 1;
4404 LValue PosLVal;
4405 if (Dependencies.IteratorExpr) {
4406 PosLVal = CGF.MakeAddrLValue(
4407 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4408 C.getSizeType());
4409 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4410 /*IsInit=*/true);
4411 Pos = &PosLVal;
4412 } else {
4413 Pos = &Idx;
4414 }
4415 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4416 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4417 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4418 CGF.Int8Ty);
4419 return DependenciesArray;
4420}
4421
4425 QualType FlagsTy;
4426 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4427 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4428 C.VoidPtrTy.castAs<PointerType>());
4429 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4431 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4433 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4434 Addr.getElementType(), Addr.emitRawPointer(CGF),
4435 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4436 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4437 CGF.VoidPtrTy);
4438 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4439 // Use default allocator.
4440 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4441 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4442
4443 // _kmpc_free(gtid, addr, nullptr);
4444 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4445 CGM.getModule(), OMPRTL___kmpc_free),
4446 Args);
4447}
4448
4450 OpenMPDependClauseKind NewDepKind,
4453 QualType FlagsTy;
4454 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4455 RecordDecl *KmpDependInfoRD =
4456 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4457 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4458 llvm::Value *NumDeps;
4459 LValue Base;
4460 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4461
4462 Address Begin = Base.getAddress();
4463 // Cast from pointer to array type to pointer to single element.
4464 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4465 Begin.emitRawPointer(CGF), NumDeps);
4466 // The basic structure here is a while-do loop.
4467 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4468 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4469 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4470 CGF.EmitBlock(BodyBB);
4471 llvm::PHINode *ElementPHI =
4472 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4473 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4474 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4475 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4476 Base.getTBAAInfo());
4477 // deps[i].flags = NewDepKind;
4478 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4479 LValue FlagsLVal = CGF.EmitLValueForField(
4480 Base, *std::next(KmpDependInfoRD->field_begin(),
4481 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4483 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4484 FlagsLVal);
4485
4486 // Shift the address forward by one element.
4487 llvm::Value *ElementNext =
4488 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4489 .emitRawPointer(CGF);
4490 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4491 llvm::Value *IsEmpty =
4492 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4493 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4494 // Done.
4495 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4496}
4497
4499 const OMPExecutableDirective &D,
4500 llvm::Function *TaskFunction,
4501 QualType SharedsTy, Address Shareds,
4502 const Expr *IfCond,
4503 const OMPTaskDataTy &Data) {
4504 if (!CGF.HaveInsertPoint())
4505 return;
4506
4508 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4509 llvm::Value *NewTask = Result.NewTask;
4510 llvm::Function *TaskEntry = Result.TaskEntry;
4511 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4512 LValue TDBase = Result.TDBase;
4513 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4514 // Process list of dependences.
4515 Address DependenciesArray = Address::invalid();
4516 llvm::Value *NumOfElements;
4517 std::tie(NumOfElements, DependenciesArray) =
4518 emitDependClause(CGF, Data.Dependences, Loc);
4519
4520 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4521 // libcall.
4522 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4523 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4524 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4525 // list is not empty
4526 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4527 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4528 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4529 llvm::Value *DepTaskArgs[7];
4530 if (!Data.Dependences.empty()) {
4531 DepTaskArgs[0] = UpLoc;
4532 DepTaskArgs[1] = ThreadID;
4533 DepTaskArgs[2] = NewTask;
4534 DepTaskArgs[3] = NumOfElements;
4535 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4536 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4537 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4538 }
4539 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4540 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4541 if (!Data.Tied) {
4542 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4543 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4544 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4545 }
4546 if (!Data.Dependences.empty()) {
4547 CGF.EmitRuntimeCall(
4548 OMPBuilder.getOrCreateRuntimeFunction(
4549 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4550 DepTaskArgs);
4551 } else {
4552 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4553 CGM.getModule(), OMPRTL___kmpc_omp_task),
4554 TaskArgs);
4555 }
4556 // Check if parent region is untied and build return for untied task;
4557 if (auto *Region =
4558 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4559 Region->emitUntiedSwitch(CGF);
4560 };
4561
4562 llvm::Value *DepWaitTaskArgs[7];
4563 if (!Data.Dependences.empty()) {
4564 DepWaitTaskArgs[0] = UpLoc;
4565 DepWaitTaskArgs[1] = ThreadID;
4566 DepWaitTaskArgs[2] = NumOfElements;
4567 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4568 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4569 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4570 DepWaitTaskArgs[6] =
4571 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4572 }
4573 auto &M = CGM.getModule();
4574 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4575 TaskEntry, &Data, &DepWaitTaskArgs,
4577 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4578 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4579 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4580 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4581 // is specified.
4582 if (!Data.Dependences.empty())
4583 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4584 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4585 DepWaitTaskArgs);
4586 // Call proxy_task_entry(gtid, new_task);
4587 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4588 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4589 Action.Enter(CGF);
4590 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4591 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4592 OutlinedFnArgs);
4593 };
4594
4595 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4596 // kmp_task_t *new_task);
4597 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4598 // kmp_task_t *new_task);
4599 RegionCodeGenTy RCG(CodeGen);
4600 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4601 M, OMPRTL___kmpc_omp_task_begin_if0),
4602 TaskArgs,
4603 OMPBuilder.getOrCreateRuntimeFunction(
4604 M, OMPRTL___kmpc_omp_task_complete_if0),
4605 TaskArgs);
4606 RCG.setAction(Action);
4607 RCG(CGF);
4608 };
4609
4610 if (IfCond) {
4611 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4612 } else {
4613 RegionCodeGenTy ThenRCG(ThenCodeGen);
4614 ThenRCG(CGF);
4615 }
4616}
4617
4619 const OMPLoopDirective &D,
4620 llvm::Function *TaskFunction,
4621 QualType SharedsTy, Address Shareds,
4622 const Expr *IfCond,
4623 const OMPTaskDataTy &Data) {
4624 if (!CGF.HaveInsertPoint())
4625 return;
4627 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4628 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4629 // libcall.
4630 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4631 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4632 // sched, kmp_uint64 grainsize, void *task_dup);
4633 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4634 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4635 llvm::Value *IfVal;
4636 if (IfCond) {
4637 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4638 /*isSigned=*/true);
4639 } else {
4640 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4641 }
4642
4643 LValue LBLVal = CGF.EmitLValueForField(
4644 Result.TDBase,
4645 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4646 const auto *LBVar =
4647 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4648 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4649 /*IsInitializer=*/true);
4650 LValue UBLVal = CGF.EmitLValueForField(
4651 Result.TDBase,
4652 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4653 const auto *UBVar =
4654 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4655 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4656 /*IsInitializer=*/true);
4657 LValue StLVal = CGF.EmitLValueForField(
4658 Result.TDBase,
4659 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4660 const auto *StVar =
4661 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4662 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4663 /*IsInitializer=*/true);
4664 // Store reductions address.
4665 LValue RedLVal = CGF.EmitLValueForField(
4666 Result.TDBase,
4667 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4668 if (Data.Reductions) {
4669 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4670 } else {
4671 CGF.EmitNullInitialization(RedLVal.getAddress(),
4672 CGF.getContext().VoidPtrTy);
4673 }
4674 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4675 llvm::Value *TaskArgs[] = {
4676 UpLoc,
4677 ThreadID,
4678 Result.NewTask,
4679 IfVal,
4680 LBLVal.getPointer(CGF),
4681 UBLVal.getPointer(CGF),
4682 CGF.EmitLoadOfScalar(StLVal, Loc),
4683 llvm::ConstantInt::getSigned(
4684 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4685 llvm::ConstantInt::getSigned(
4686 CGF.IntTy, Data.Schedule.getPointer()
4687 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4688 : NoSchedule),
4689 Data.Schedule.getPointer()
4690 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4691 /*isSigned=*/false)
4692 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4694 Result.TaskDupFn, CGF.VoidPtrTy)
4695 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4696 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4697 CGM.getModule(), OMPRTL___kmpc_taskloop),
4698 TaskArgs);
4699}
4700
4701/// Emit reduction operation for each element of array (required for
4702/// array sections) LHS op = RHS.
4703/// \param Type Type of array.
4704/// \param LHSVar Variable on the left side of the reduction operation
4705/// (references element of array in original variable).
4706/// \param RHSVar Variable on the right side of the reduction operation
4707/// (references element of array in original variable).
4708/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4709/// RHSVar.
4711 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4712 const VarDecl *RHSVar,
4713 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4714 const Expr *, const Expr *)> &RedOpGen,
4715 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4716 const Expr *UpExpr = nullptr) {
4717 // Perform element-by-element initialization.
4718 QualType ElementTy;
4719 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4720 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4721
4722 // Drill down to the base element type on both arrays.
4723 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4724 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4725
4726 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4727 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4728 // Cast from pointer to array type to pointer to single element.
4729 llvm::Value *LHSEnd =
4730 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4731 // The basic structure here is a while-do loop.
4732 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4733 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4734 llvm::Value *IsEmpty =
4735 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4736 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4737
4738 // Enter the loop body, making that address the current address.
4739 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4740 CGF.EmitBlock(BodyBB);
4741
4742 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4743
4744 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4745 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4746 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4747 Address RHSElementCurrent(
4748 RHSElementPHI, RHSAddr.getElementType(),
4749 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4750
4751 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4752 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4753 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4754 Address LHSElementCurrent(
4755 LHSElementPHI, LHSAddr.getElementType(),
4756 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4757
4758 // Emit copy.
4759 CodeGenFunction::OMPPrivateScope Scope(CGF);
4760 Scope.addPrivate(LHSVar, LHSElementCurrent);
4761 Scope.addPrivate(RHSVar, RHSElementCurrent);
4762 Scope.Privatize();
4763 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4764 Scope.ForceCleanup();
4765
4766 // Shift the address forward by one element.
4767 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4768 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4769 "omp.arraycpy.dest.element");
4770 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4771 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4772 "omp.arraycpy.src.element");
4773 // Check whether we've reached the end.
4774 llvm::Value *Done =
4775 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4776 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4777 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4778 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4779
4780 // Done.
4781 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4782}
4783
4784/// Emit reduction combiner. If the combiner is a simple expression emit it as
4785/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4786/// UDR combiner function.
4788 const Expr *ReductionOp) {
4789 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4790 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4791 if (const auto *DRE =
4792 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4793 if (const auto *DRD =
4794 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4795 std::pair<llvm::Function *, llvm::Function *> Reduction =
4798 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4799 CGF.EmitIgnoredExpr(ReductionOp);
4800 return;
4801 }
4802 CGF.EmitIgnoredExpr(ReductionOp);
4803}
4804
4806 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4808 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4810
4811 // void reduction_func(void *LHSArg, void *RHSArg);
4812 FunctionArgList Args;
4813 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4815 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4817 Args.push_back(&LHSArg);
4818 Args.push_back(&RHSArg);
4819 const auto &CGFI =
4821 std::string Name = getReductionFuncName(ReducerName);
4822 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4823 llvm::GlobalValue::InternalLinkage, Name,
4824 &CGM.getModule());
4826 Fn->setDoesNotRecurse();
4827 CodeGenFunction CGF(CGM);
4828 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4829
4830 // Dst = (void*[n])(LHSArg);
4831 // Src = (void*[n])(RHSArg);
4833 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4834 ArgsElemType->getPointerTo()),
4835 ArgsElemType, CGF.getPointerAlign());
4837 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4838 ArgsElemType->getPointerTo()),
4839 ArgsElemType, CGF.getPointerAlign());
4840
4841 // ...
4842 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4843 // ...
4845 const auto *IPriv = Privates.begin();
4846 unsigned Idx = 0;
4847 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4848 const auto *RHSVar =
4849 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4850 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4851 const auto *LHSVar =
4852 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4853 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4854 QualType PrivTy = (*IPriv)->getType();
4855 if (PrivTy->isVariablyModifiedType()) {
4856 // Get array size and emit VLA type.
4857 ++Idx;
4858 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4859 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4860 const VariableArrayType *VLA =
4861 CGF.getContext().getAsVariableArrayType(PrivTy);
4862 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4864 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4865 CGF.EmitVariablyModifiedType(PrivTy);
4866 }
4867 }
4868 Scope.Privatize();
4869 IPriv = Privates.begin();
4870 const auto *ILHS = LHSExprs.begin();
4871 const auto *IRHS = RHSExprs.begin();
4872 for (const Expr *E : ReductionOps) {
4873 if ((*IPriv)->getType()->isArrayType()) {
4874 // Emit reduction for array section.
4875 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4876 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4878 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4879 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4880 emitReductionCombiner(CGF, E);
4881 });
4882 } else {
4883 // Emit reduction for array subscript or single variable.
4884 emitReductionCombiner(CGF, E);
4885 }
4886 ++IPriv;
4887 ++ILHS;
4888 ++IRHS;
4889 }
4890 Scope.ForceCleanup();
4891 CGF.FinishFunction();
4892 return Fn;
4893}
4894
4896 const Expr *ReductionOp,
4897 const Expr *PrivateRef,
4898 const DeclRefExpr *LHS,
4899 const DeclRefExpr *RHS) {
4900 if (PrivateRef->getType()->isArrayType()) {
4901 // Emit reduction for array section.
4902 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4903 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4905 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4906 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4907 emitReductionCombiner(CGF, ReductionOp);
4908 });
4909 } else {
4910 // Emit reduction for array subscript or single variable.
4911 emitReductionCombiner(CGF, ReductionOp);
4912 }
4913}
4914
4916 ArrayRef<const Expr *> Privates,
4917 ArrayRef<const Expr *> LHSExprs,
4918 ArrayRef<const Expr *> RHSExprs,
4919 ArrayRef<const Expr *> ReductionOps,
4920 ReductionOptionsTy Options) {
4921 if (!CGF.HaveInsertPoint())
4922 return;
4923
4924 bool WithNowait = Options.WithNowait;
4925 bool SimpleReduction = Options.SimpleReduction;
4926
4927 // Next code should be emitted for reduction:
4928 //
4929 // static kmp_critical_name lock = { 0 };
4930 //
4931 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4932 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4933 // ...
4934 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4935 // *(Type<n>-1*)rhs[<n>-1]);
4936 // }
4937 //
4938 // ...
4939 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4940 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4941 // RedList, reduce_func, &<lock>)) {
4942 // case 1:
4943 // ...
4944 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4945 // ...
4946 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4947 // break;
4948 // case 2:
4949 // ...
4950 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4951 // ...
4952 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4953 // break;
4954 // default:;
4955 // }
4956 //
4957 // if SimpleReduction is true, only the next code is generated:
4958 // ...
4959 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4960 // ...
4961
4963
4964 if (SimpleReduction) {
4966 const auto *IPriv = Privates.begin();
4967 const auto *ILHS = LHSExprs.begin();
4968 const auto *IRHS = RHSExprs.begin();
4969 for (const Expr *E : ReductionOps) {
4970 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4971 cast<DeclRefExpr>(*IRHS));
4972 ++IPriv;
4973 ++ILHS;
4974 ++IRHS;
4975 }
4976 return;
4977 }
4978
4979 // 1. Build a list of reduction variables.
4980 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4981 auto Size = RHSExprs.size();
4982 for (const Expr *E : Privates) {
4983 if (E->getType()->isVariablyModifiedType())
4984 // Reserve place for array size.
4985 ++Size;
4986 }
4987 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4988 QualType ReductionArrayTy = C.getConstantArrayType(
4989 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
4990 /*IndexTypeQuals=*/0);
4991 RawAddress ReductionList =
4992 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4993 const auto *IPriv = Privates.begin();
4994 unsigned Idx = 0;
4995 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4996 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
4997 CGF.Builder.CreateStore(
4999 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5000 Elem);
5001 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5002 // Store array size.
5003 ++Idx;
5004 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5005 llvm::Value *Size = CGF.Builder.CreateIntCast(
5006 CGF.getVLASize(
5007 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5008 .NumElts,
5009 CGF.SizeTy, /*isSigned=*/false);
5010 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5011 Elem);
5012 }
5013 }
5014
5015 // 2. Emit reduce_func().
5016 llvm::Function *ReductionFn = emitReductionFunction(
5017 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5018 Privates, LHSExprs, RHSExprs, ReductionOps);
5019
5020 // 3. Create static kmp_critical_name lock = { 0 };
5021 std::string Name = getName({"reduction"});
5022 llvm::Value *Lock = getCriticalRegionLock(Name);
5023
5024 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5025 // RedList, reduce_func, &<lock>);
5026 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5027 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5028 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5029 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5030 ReductionList.getPointer(), CGF.VoidPtrTy);
5031 llvm::Value *Args[] = {
5032 IdentTLoc, // ident_t *<loc>
5033 ThreadId, // i32 <gtid>
5034 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5035 ReductionArrayTySize, // size_type sizeof(RedList)
5036 RL, // void *RedList
5037 ReductionFn, // void (*) (void *, void *) <reduce_func>
5038 Lock // kmp_critical_name *&<lock>
5039 };
5040 llvm::Value *Res = CGF.EmitRuntimeCall(
5041 OMPBuilder.getOrCreateRuntimeFunction(
5042 CGM.getModule(),
5043 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5044 Args);
5045
5046 // 5. Build switch(res)
5047 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5048 llvm::SwitchInst *SwInst =
5049 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5050
5051 // 6. Build case 1:
5052 // ...
5053 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5054 // ...
5055 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5056 // break;
5057 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5058 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5059 CGF.EmitBlock(Case1BB);
5060
5061 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5062 llvm::Value *EndArgs[] = {
5063 IdentTLoc, // ident_t *<loc>
5064 ThreadId, // i32 <gtid>
5065 Lock // kmp_critical_name *&<lock>
5066 };
5067 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5068 CodeGenFunction &CGF, PrePostActionTy &Action) {
5070 const auto *IPriv = Privates.begin();
5071 const auto *ILHS = LHSExprs.begin();
5072 const auto *IRHS = RHSExprs.begin();
5073 for (const Expr *E : ReductionOps) {
5074 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5075 cast<DeclRefExpr>(*IRHS));
5076 ++IPriv;
5077 ++ILHS;
5078 ++IRHS;
5079 }
5080 };
5081 RegionCodeGenTy RCG(CodeGen);
5082 CommonActionTy Action(
5083 nullptr, std::nullopt,
5084 OMPBuilder.getOrCreateRuntimeFunction(
5085 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5086 : OMPRTL___kmpc_end_reduce),
5087 EndArgs);
5088 RCG.setAction(Action);
5089 RCG(CGF);
5090
5091 CGF.EmitBranch(DefaultBB);
5092
5093 // 7. Build case 2:
5094 // ...
5095 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5096 // ...
5097 // break;
5098 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5099 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5100 CGF.EmitBlock(Case2BB);
5101
5102 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5103 CodeGenFunction &CGF, PrePostActionTy &Action) {
5104 const auto *ILHS = LHSExprs.begin();
5105 const auto *IRHS = RHSExprs.begin();
5106 const auto *IPriv = Privates.begin();
5107 for (const Expr *E : ReductionOps) {
5108 const Expr *XExpr = nullptr;
5109 const Expr *EExpr = nullptr;
5110 const Expr *UpExpr = nullptr;
5111 BinaryOperatorKind BO = BO_Comma;
5112 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5113 if (BO->getOpcode() == BO_Assign) {
5114 XExpr = BO->getLHS();
5115 UpExpr = BO->getRHS();
5116 }
5117 }
5118 // Try to emit update expression as a simple atomic.
5119 const Expr *RHSExpr = UpExpr;
5120 if (RHSExpr) {
5121 // Analyze RHS part of the whole expression.
5122 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5123 RHSExpr->IgnoreParenImpCasts())) {
5124 // If this is a conditional operator, analyze its condition for
5125 // min/max reduction operator.
5126 RHSExpr = ACO->getCond();
5127 }
5128 if (const auto *BORHS =
5129 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5130 EExpr = BORHS->getRHS();
5131 BO = BORHS->getOpcode();
5132 }
5133 }
5134 if (XExpr) {
5135 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5136 auto &&AtomicRedGen = [BO, VD,
5137 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5138 const Expr *EExpr, const Expr *UpExpr) {
5139 LValue X = CGF.EmitLValue(XExpr);
5140 RValue E;
5141 if (EExpr)
5142 E = CGF.EmitAnyExpr(EExpr);
5143 CGF.EmitOMPAtomicSimpleUpdateExpr(
5144 X, E, BO, /*IsXLHSInRHSPart=*/true,
5145 llvm::AtomicOrdering::Monotonic, Loc,
5146 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5147 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5148 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5149 CGF.emitOMPSimpleStore(
5150 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5151 VD->getType().getNonReferenceType(), Loc);
5152 PrivateScope.addPrivate(VD, LHSTemp);
5153 (void)PrivateScope.Privatize();
5154 return CGF.EmitAnyExpr(UpExpr);
5155 });
5156 };
5157 if ((*IPriv)->getType()->isArrayType()) {
5158 // Emit atomic reduction for array section.
5159 const auto *RHSVar =
5160 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5161 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5162 AtomicRedGen, XExpr, EExpr, UpExpr);
5163 } else {
5164 // Emit atomic reduction for array subscript or single variable.
5165 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5166 }
5167 } else {
5168 // Emit as a critical region.
5169 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5170 const Expr *, const Expr *) {
5172 std::string Name = RT.getName({"atomic_reduction"});
5174 CGF, Name,
5175 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5176 Action.Enter(CGF);
5177 emitReductionCombiner(CGF, E);
5178 },
5179 Loc);
5180 };
5181 if ((*IPriv)->getType()->isArrayType()) {
5182 const auto *LHSVar =
5183 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5184 const auto *RHSVar =
5185 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5186 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5187 CritRedGen);
5188 } else {
5189 CritRedGen(CGF, nullptr, nullptr, nullptr);
5190 }
5191 }
5192 ++ILHS;
5193 ++IRHS;
5194 ++IPriv;
5195 }
5196 };
5197 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5198 if (!WithNowait) {
5199 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5200 llvm::Value *EndArgs[] = {
5201 IdentTLoc, // ident_t *<loc>
5202 ThreadId, // i32 <gtid>
5203 Lock // kmp_critical_name *&<lock>
5204 };
5205 CommonActionTy Action(nullptr, std::nullopt,
5206 OMPBuilder.getOrCreateRuntimeFunction(
5207 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5208 EndArgs);
5209 AtomicRCG.setAction(Action);
5210 AtomicRCG(CGF);
5211 } else {
5212 AtomicRCG(CGF);
5213 }
5214
5215 CGF.EmitBranch(DefaultBB);
5216 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5217}
5218
5219/// Generates unique name for artificial threadprivate variables.
5220/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5221static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5222 const Expr *Ref) {
5223 SmallString<256> Buffer;
5224 llvm::raw_svector_ostream Out(Buffer);
5225 const clang::DeclRefExpr *DE;
5226 const VarDecl *D = ::getBaseDecl(Ref, DE);
5227 if (!D)
5228 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5229 D = D->getCanonicalDecl();
5230 std::string Name = CGM.getOpenMPRuntime().getName(
5231 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5232 Out << Prefix << Name << "_"
5234 return std::string(Out.str());
5235}
5236
5237/// Emits reduction initializer function:
5238/// \code
5239/// void @.red_init(void* %arg, void* %orig) {
5240/// %0 = bitcast void* %arg to <type>*
5241/// store <type> <init>, <type>* %0
5242/// ret void
5243/// }
5244/// \endcode
5245static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5247 ReductionCodeGen &RCG, unsigned N) {
5248 ASTContext &C = CGM.getContext();
5249 QualType VoidPtrTy = C.VoidPtrTy;
5250 VoidPtrTy.addRestrict();
5251 FunctionArgList Args;
5252 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5254 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5256 Args.emplace_back(&Param);
5257 Args.emplace_back(&ParamOrig);
5258 const auto &FnInfo =
5259 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5260 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5261 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5262 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5263 Name, &CGM.getModule());
5264 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5265 Fn->setDoesNotRecurse();
5266 CodeGenFunction CGF(CGM);
5267 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5268 QualType PrivateType = RCG.getPrivateType(N);
5269 Address PrivateAddr = CGF.EmitLoadOfPointer(
5271 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5272 C.getPointerType(PrivateType)->castAs<PointerType>());
5273 llvm::Value *Size = nullptr;
5274 // If the size of the reduction item is non-constant, load it from global
5275 // threadprivate variable.
5276 if (RCG.getSizes(N).second) {
5278 CGF, CGM.getContext().getSizeType(),
5279 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5280 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5281 CGM.getContext().getSizeType(), Loc);
5282 }
5283 RCG.emitAggregateType(CGF, N, Size);
5284 Address OrigAddr = Address::invalid();
5285 // If initializer uses initializer from declare reduction construct, emit a
5286 // pointer to the address of the original reduction item (reuired by reduction
5287 // initializer)
5288 if (RCG.usesReductionInitializer(N)) {
5289 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5290 OrigAddr = CGF.EmitLoadOfPointer(
5291 SharedAddr,
5292 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5293 }
5294 // Emit the initializer:
5295 // %0 = bitcast void* %arg to <type>*
5296 // store <type> <init>, <type>* %0
5297 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5298 [](CodeGenFunction &) { return false; });
5299 CGF.FinishFunction();
5300 return Fn;
5301}
5302
5303/// Emits reduction combiner function:
5304/// \code
5305/// void @.red_comb(void* %arg0, void* %arg1) {
5306/// %lhs = bitcast void* %arg0 to <type>*
5307/// %rhs = bitcast void* %arg1 to <type>*
5308/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5309/// store <type> %2, <type>* %lhs
5310/// ret void
5311/// }
5312/// \endcode
5313static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5315 ReductionCodeGen &RCG, unsigned N,
5316 const Expr *ReductionOp,
5317 const Expr *LHS, const Expr *RHS,
5318 const Expr *PrivateRef) {
5319 ASTContext &C = CGM.getContext();
5320 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5321 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5322 FunctionArgList Args;
5323 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5324 C.VoidPtrTy, ImplicitParamKind::Other);
5325 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5327 Args.emplace_back(&ParamInOut);
5328 Args.emplace_back(&ParamIn);
5329 const auto &FnInfo =
5330 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5331 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5332 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5333 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5334 Name, &CGM.getModule());
5335 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5336 Fn->setDoesNotRecurse();
5337 CodeGenFunction CGF(CGM);
5338 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5339 llvm::Value *Size = nullptr;
5340 // If the size of the reduction item is non-constant, load it from global
5341 // threadprivate variable.
5342 if (RCG.getSizes(N).second) {
5344 CGF, CGM.getContext().getSizeType(),
5345 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5346 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5347 CGM.getContext().getSizeType(), Loc);
5348 }
5349 RCG.emitAggregateType(CGF, N, Size);
5350 // Remap lhs and rhs variables to the addresses of the function arguments.
5351 // %lhs = bitcast void* %arg0 to <type>*
5352 // %rhs = bitcast void* %arg1 to <type>*
5353 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5354 PrivateScope.addPrivate(
5355 LHSVD,
5356 // Pull out the pointer to the variable.
5358 CGF.GetAddrOfLocalVar(&ParamInOut)
5360 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5361 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5362 PrivateScope.addPrivate(
5363 RHSVD,
5364 // Pull out the pointer to the variable.
5366 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5367 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5368 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5369 PrivateScope.Privatize();
5370 // Emit the combiner body:
5371 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5372 // store <type> %2, <type>* %lhs
5374 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5375 cast<DeclRefExpr>(RHS));
5376 CGF.FinishFunction();
5377 return Fn;
5378}
5379
5380/// Emits reduction finalizer function:
5381/// \code
5382/// void @.red_fini(void* %arg) {
5383/// %0 = bitcast void* %arg to <type>*
5384/// <destroy>(<type>* %0)
5385/// ret void
5386/// }
5387/// \endcode
5388static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5390 ReductionCodeGen &RCG, unsigned N) {
5391 if (!RCG.needCleanups(N))
5392 return nullptr;
5393 ASTContext &C = CGM.getContext();
5394 FunctionArgList Args;
5395 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5397 Args.emplace_back(&Param);
5398 const auto &FnInfo =
5399 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5400 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5401 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5402 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5403 Name, &CGM.getModule());
5404 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5405 Fn->setDoesNotRecurse();
5406 CodeGenFunction CGF(CGM);
5407 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5408 Address PrivateAddr = CGF.EmitLoadOfPointer(
5409 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5410 llvm::Value *Size = nullptr;
5411 // If the size of the reduction item is non-constant, load it from global
5412 // threadprivate variable.
5413 if (RCG.getSizes(N).second) {
5415 CGF, CGM.getContext().getSizeType(),
5416 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5417 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5418 CGM.getContext().getSizeType(), Loc);
5419 }
5420 RCG.emitAggregateType(CGF, N, Size);
5421 // Emit the finalizer body:
5422 // <destroy>(<type>* %0)
5423 RCG.emitCleanups(CGF, N, PrivateAddr);
5424 CGF.FinishFunction(Loc);
5425 return Fn;
5426}
5427
5430 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5431 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5432 return nullptr;
5433
5434 // Build typedef struct:
5435 // kmp_taskred_input {
5436 // void *reduce_shar; // shared reduction item
5437 // void *reduce_orig; // original reduction item used for initialization
5438 // size_t reduce_size; // size of data item
5439 // void *reduce_init; // data initialization routine
5440 // void *reduce_fini; // data finalization routine
5441 // void *reduce_comb; // data combiner routine
5442 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5443 // } kmp_taskred_input_t;
5445 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5446 RD->startDefinition();
5447 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5448 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5449 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5450 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5451 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5452 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5453 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5454 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5455 RD->completeDefinition();
5456 QualType RDType = C.getRecordType(RD);
5457 unsigned Size = Data.ReductionVars.size();
5458 llvm::APInt ArraySize(/*numBits=*/64, Size);
5459 QualType ArrayRDType =
5460 C.getConstantArrayType(RDType, ArraySize, nullptr,
5461 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5462 // kmp_task_red_input_t .rd_input.[Size];
5463 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5464 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5465 Data.ReductionCopies, Data.ReductionOps);
5466 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5467 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5468 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5469 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5470 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5471 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5472 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5473 ".rd_input.gep.");
5474 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5475 // ElemLVal.reduce_shar = &Shareds[Cnt];
5476 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5477 RCG.emitSharedOrigLValue(CGF, Cnt);
5478 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5479 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5480 // ElemLVal.reduce_orig = &Origs[Cnt];
5481 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5482 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5483 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5484 RCG.emitAggregateType(CGF, Cnt);
5485 llvm::Value *SizeValInChars;
5486 llvm::Value *SizeVal;
5487 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5488 // We use delayed creation/initialization for VLAs and array sections. It is
5489 // required because runtime does not provide the way to pass the sizes of
5490 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5491 // threadprivate global variables are used to store these values and use
5492 // them in the functions.
5493 bool DelayedCreation = !!SizeVal;
5494 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5495 /*isSigned=*/false);
5496 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5497 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5498 // ElemLVal.reduce_init = init;
5499 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5500 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5501 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5502 // ElemLVal.reduce_fini = fini;
5503 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5504 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5505 llvm::Value *FiniAddr =
5506 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5507 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5508 // ElemLVal.reduce_comb = comb;
5509 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5510 llvm::Value *CombAddr = emitReduceCombFunction(
5511 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5512 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5513 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5514 // ElemLVal.flags = 0;
5515 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5516 if (DelayedCreation) {
5518 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5519 FlagsLVal);
5520 } else
5521 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5522 }
5523 if (Data.IsReductionWithTaskMod) {
5524 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5525 // is_ws, int num, void *data);
5526 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5527 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5528 CGM.IntTy, /*isSigned=*/true);
5529 llvm::Value *Args[] = {
5530 IdentTLoc, GTid,
5531 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5532 /*isSigned=*/true),
5533 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5535 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5536 return CGF.EmitRuntimeCall(
5537 OMPBuilder.getOrCreateRuntimeFunction(
5538 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5539 Args);
5540 }
5541 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5542 llvm::Value *Args[] = {
5543 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5544 /*isSigned=*/true),
5545 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5547 CGM.VoidPtrTy)};
5548 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5549 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5550 Args);
5551}
5552
5555 bool IsWorksharingReduction) {
5556 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5557 // is_ws, int num, void *data);
5558 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5559 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5560 CGM.IntTy, /*isSigned=*/true);
5561 llvm::Value *Args[] = {IdentTLoc, GTid,
5562 llvm::ConstantInt::get(CGM.IntTy,
5563 IsWorksharingReduction ? 1 : 0,
5564 /*isSigned=*/true)};
5565 (void)CGF.EmitRuntimeCall(
5566 OMPBuilder.getOrCreateRuntimeFunction(
5567 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5568 Args);
5569}
5570
5573 ReductionCodeGen &RCG,
5574 unsigned N) {
5575 auto Sizes = RCG.getSizes(N);
5576 // Emit threadprivate global variable if the type is non-constant
5577 // (Sizes.second = nullptr).
5578 if (Sizes.second) {
5579 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5580 /*isSigned=*/false);
5582 CGF, CGM.getContext().getSizeType(),
5583 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5584 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5585 }
5586}
5587
5590 llvm::Value *ReductionsPtr,
5591 LValue SharedLVal) {
5592 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5593 // *d);
5594 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5595 CGM.IntTy,
5596 /*isSigned=*/true),
5597 ReductionsPtr,
5599 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5600 return Address(
5601 CGF.EmitRuntimeCall(
5602 OMPBuilder.getOrCreateRuntimeFunction(
5603 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5604 Args),
5605 CGF.Int8Ty, SharedLVal.getAlignment());
5606}
5607
5609 const OMPTaskDataTy &Data) {
5610 if (!CGF.HaveInsertPoint())
5611 return;
5612
5613 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5614 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5615 OMPBuilder.createTaskwait(CGF.Builder);
5616 } else {
5617 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5618 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5619 auto &M = CGM.getModule();
5620 Address DependenciesArray = Address::invalid();
5621 llvm::Value *NumOfElements;
5622 std::tie(NumOfElements, DependenciesArray) =
5623 emitDependClause(CGF, Data.Dependences, Loc);
5624 if (!Data.Dependences.empty()) {
5625 llvm::Value *DepWaitTaskArgs[7];
5626 DepWaitTaskArgs[0] = UpLoc;
5627 DepWaitTaskArgs[1] = ThreadID;
5628 DepWaitTaskArgs[2] = NumOfElements;
5629 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5630 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5631 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5632 DepWaitTaskArgs[6] =
5633 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5634
5635 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5636
5637 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5638 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5639 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5640 // kmp_int32 has_no_wait); if dependence info is specified.
5641 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5642 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5643 DepWaitTaskArgs);
5644
5645 } else {
5646
5647 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5648 // global_tid);
5649 llvm::Value *Args[] = {UpLoc, ThreadID};
5650 // Ignore return result until untied tasks are supported.
5651 CGF.EmitRuntimeCall(
5652 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5653 Args);
5654 }
5655 }
5656
5657 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5658 Region->emitUntiedSwitch(CGF);
5659}
5660
5662 OpenMPDirectiveKind InnerKind,
5663 const RegionCodeGenTy &CodeGen,
5664 bool HasCancel) {
5665 if (!CGF.HaveInsertPoint())
5666 return;
5667 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5668 InnerKind != OMPD_critical &&
5669 InnerKind != OMPD_master &&
5670 InnerKind != OMPD_masked);
5671 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5672}
5673
5674namespace {
5675enum RTCancelKind {
5676 CancelNoreq = 0,
5677 CancelParallel = 1,
5678 CancelLoop = 2,
5679 CancelSections = 3,
5680 CancelTaskgroup = 4
5681};
5682} // anonymous namespace
5683
5684static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5685 RTCancelKind CancelKind = CancelNoreq;
5686 if (CancelRegion == OMPD_parallel)
5687 CancelKind = CancelParallel;
5688 else if (CancelRegion == OMPD_for)
5689 CancelKind = CancelLoop;
5690 else if (CancelRegion == OMPD_sections)
5691 CancelKind = CancelSections;
5692 else {
5693 assert(CancelRegion == OMPD_taskgroup);
5694 CancelKind = CancelTaskgroup;
5695 }
5696 return CancelKind;
5697}
5698
5701 OpenMPDirectiveKind CancelRegion) {
5702 if (!CGF.HaveInsertPoint())
5703 return;
5704 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5705 // global_tid, kmp_int32 cncl_kind);
5706 if (auto *OMPRegionInfo =
5707 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5708 // For 'cancellation point taskgroup', the task region info may not have a
5709 // cancel. This may instead happen in another adjacent task.
5710 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5711 llvm::Value *Args[] = {
5713 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5714 // Ignore return result until untied tasks are supported.
5715 llvm::Value *Result = CGF.EmitRuntimeCall(
5716 OMPBuilder.getOrCreateRuntimeFunction(
5717 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5718 Args);
5719 // if (__kmpc_cancellationpoint()) {
5720 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5721 // exit from construct;
5722 // }
5723 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5724 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5725 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5726 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5727 CGF.EmitBlock(ExitBB);
5728 if (CancelRegion == OMPD_parallel)
5729 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5730 // exit from construct;
5731 CodeGenFunction::JumpDest CancelDest =
5732 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5733 CGF.EmitBranchThroughCleanup(CancelDest);
5734 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5735 }
5736 }
5737}
5738
5740 const Expr *IfCond,
5741 OpenMPDirectiveKind CancelRegion) {
5742 if (!CGF.HaveInsertPoint())
5743 return;
5744 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5745 // kmp_int32 cncl_kind);
5746 auto &M = CGM.getModule();
5747 if (auto *OMPRegionInfo =
5748 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5749 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5750 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5752 llvm::Value *Args[] = {
5753 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5754 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5755 // Ignore return result until untied tasks are supported.
5756 llvm::Value *Result = CGF.EmitRuntimeCall(
5757 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5758 // if (__kmpc_cancel()) {
5759 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5760 // exit from construct;
5761 // }
5762 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5763 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5764 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5765 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5766 CGF.EmitBlock(ExitBB);
5767 if (CancelRegion == OMPD_parallel)
5768 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5769 // exit from construct;
5770 CodeGenFunction::JumpDest CancelDest =
5771 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5772 CGF.EmitBranchThroughCleanup(CancelDest);
5773 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5774 };
5775 if (IfCond) {
5776 emitIfClause(CGF, IfCond, ThenGen,
5777 [](CodeGenFunction &, PrePostActionTy &) {});
5778 } else {
5779 RegionCodeGenTy ThenRCG(ThenGen);
5780 ThenRCG(CGF);
5781 }
5782 }
5783}
5784
5785namespace {
5786/// Cleanup action for uses_allocators support.
5787class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5789
5790public:
5791 OMPUsesAllocatorsActionTy(
5792 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5793 : Allocators(Allocators) {}
5794 void Enter(CodeGenFunction &CGF) override {
5795 if (!CGF.HaveInsertPoint())
5796 return;
5797 for (const auto &AllocatorData : Allocators) {
5799 CGF, AllocatorData.first, AllocatorData.second);
5800 }
5801 }
5802 void Exit(CodeGenFunction &CGF) override {
5803 if (!CGF.HaveInsertPoint())
5804 return;
5805 for (const auto &AllocatorData : Allocators) {
5807 AllocatorData.first);
5808 }
5809 }
5810};
5811} // namespace
5812
5814 const OMPExecutableDirective &D, StringRef ParentName,
5815 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5816 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5817 assert(!ParentName.empty() && "Invalid target entry parent name!");
5820 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5821 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5822 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5823 if (!D.AllocatorTraits)
5824 continue;
5825 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5826 }
5827 }
5828 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5829 CodeGen.setAction(UsesAllocatorAction);
5830 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5831 IsOffloadEntry, CodeGen);
5832}
5833
5835 const Expr *Allocator,
5836 const Expr *AllocatorTraits) {
5837 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5838 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5839 // Use default memspace handle.
5840 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5841 llvm::Value *NumTraits = llvm::ConstantInt::get(
5842 CGF.IntTy, cast<ConstantArrayType>(
5843 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5844 ->getSize()
5845 .getLimitedValue());
5846 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5848 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5849 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5850 AllocatorTraitsLVal.getBaseInfo(),
5851 AllocatorTraitsLVal.getTBAAInfo());
5852 llvm::Value *Traits = Addr.emitRawPointer(CGF);
5853
5854 llvm::Value *AllocatorVal =
5855 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5856 CGM.getModule(), OMPRTL___kmpc_init_allocator),
5857 {ThreadId, MemSpaceHandle, NumTraits, Traits});
5858 // Store to allocator.
5859 CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5860 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5861 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5862 AllocatorVal =
5863 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5864 Allocator->getType(), Allocator->getExprLoc());
5865 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5866}
5867
5869 const Expr *Allocator) {
5870 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5871 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5872 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5873 llvm::Value *AllocatorVal =
5874 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5875 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5876 CGF.getContext().VoidPtrTy,
5877 Allocator->getExprLoc());
5878 (void)CGF.EmitRuntimeCall(
5879 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5880 OMPRTL___kmpc_destroy_allocator),
5881 {ThreadId, AllocatorVal});
5882}
5883
5886 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5887 int32_t &MaxTeamsVal) {
5888
5889 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5890 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5891 /*UpperBoundOnly=*/true);
5892
5893 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5894 for (auto *A : C->getAttrs()) {
5895 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5896 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5897 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5898 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5899 &AttrMinBlocksVal, &AttrMaxBlocksVal);
5900 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5902 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5903 &AttrMaxThreadsVal);
5904 else
5905 continue;
5906
5907 MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
5908 if (AttrMaxThreadsVal > 0)
5909 MaxThreadsVal = MaxThreadsVal > 0
5910 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5911 : AttrMaxThreadsVal;
5912 MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
5913 if (AttrMaxBlocksVal > 0)
5914 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5915 : AttrMaxBlocksVal;
5916 }
5917 }
5918}
5919
5921 const OMPExecutableDirective &D, StringRef ParentName,
5922 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5923 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5924
5925 llvm::TargetRegionEntryInfo EntryInfo =
5927
5928 CodeGenFunction CGF(CGM, true);
5929 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5930 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5931 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5932
5933 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5934 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5936 };
5937
5938 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
5939 IsOffloadEntry, OutlinedFn, OutlinedFnID);
5940
5941 if (!OutlinedFn)
5942 return;
5943
5944 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5945
5946 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5947 for (auto *A : C->getAttrs()) {
5948 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5949 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5950 }
5951 }
5952}
5953
5954/// Checks if the expression is constant or does not have non-trivial function
5955/// calls.
5956static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5957 // We can skip constant expressions.
5958 // We can skip expressions with trivial calls or simple expressions.
5960 !E->hasNonTrivialCall(Ctx)) &&
5961 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5962}
5963
5965 const Stmt *Body) {
5966 const Stmt *Child = Body->IgnoreContainers();
5967 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5968 Child = nullptr;
5969 for (const Stmt *S : C->body()) {
5970 if (const auto *E = dyn_cast<Expr>(S)) {
5971 if (isTrivial(Ctx, E))
5972 continue;
5973 }
5974 // Some of the statements can be ignored.
5975 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5976 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5977 continue;
5978 // Analyze declarations.
5979 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5980 if (llvm::all_of(DS->decls(), [](const Decl *D) {
5981 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5982 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5983 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5984 isa<UsingDirectiveDecl>(D) ||
5985 isa<OMPDeclareReductionDecl>(D) ||
5986 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
5987 return true;
5988 const auto *VD = dyn_cast<VarDecl>(D);
5989 if (!VD)
5990 return false;
5991 return VD->hasGlobalStorage() || !VD->isUsed();
5992 }))
5993 continue;
5994 }
5995 // Found multiple children - cannot get the one child only.
5996 if (Child)
5997 return nullptr;
5998 Child = S;
5999 }
6000 if (Child)
6001 Child = Child->IgnoreContainers();
6002 }
6003 return Child;
6004}
6005
6007 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6008 int32_t &MaxTeamsVal) {
6009
6010 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6011 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6012 "Expected target-based executable directive.");
6013 switch (DirectiveKind) {
6014 case OMPD_target: {
6015 const auto *CS = D.getInnermostCapturedStmt();
6016 const auto *Body =
6017 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6018 const Stmt *ChildStmt =
6020 if (const auto *NestedDir =
6021 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6022 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6023 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6024 const Expr *NumTeams =
6025 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6026 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6027 if (auto Constant =
6028 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6029 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6030 return NumTeams;
6031 }
6032 MinTeamsVal = MaxTeamsVal = 0;
6033 return nullptr;
6034 }
6035 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6036 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6037 MinTeamsVal = MaxTeamsVal = 1;
6038 return nullptr;
6039 }
6040 MinTeamsVal = MaxTeamsVal = 1;
6041 return nullptr;
6042 }
6043 // A value of -1 is used to check if we need to emit no teams region
6044 MinTeamsVal = MaxTeamsVal = -1;
6045 return nullptr;
6046 }
6047 case OMPD_target_teams_loop:
6048 case OMPD_target_teams:
6049 case OMPD_target_teams_distribute:
6050 case OMPD_target_teams_distribute_simd:
6051 case OMPD_target_teams_distribute_parallel_for:
6052 case OMPD_target_teams_distribute_parallel_for_simd: {
6054 const Expr *NumTeams =
6055 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6056 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6057 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6058 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6059 return NumTeams;
6060 }
6061 MinTeamsVal = MaxTeamsVal = 0;
6062 return nullptr;
6063 }
6064 case OMPD_target_parallel:
6065 case OMPD_target_parallel_for:
6066 case OMPD_target_parallel_for_simd:
6067 case OMPD_target_parallel_loop:
6068 case OMPD_target_simd:
6069 MinTeamsVal = MaxTeamsVal = 1;
6070 return nullptr;
6071 case OMPD_parallel:
6072 case OMPD_for:
6073 case OMPD_parallel_for:
6074 case OMPD_parallel_loop:
6075 case OMPD_parallel_master:
6076 case OMPD_parallel_sections:
6077 case OMPD_for_simd:
6078 case OMPD_parallel_for_simd:
6079 case OMPD_cancel:
6080 case OMPD_cancellation_point:
6081 case OMPD_ordered:
6082 case OMPD_threadprivate:
6083 case OMPD_allocate:
6084 case OMPD_task:
6085 case OMPD_simd:
6086 case OMPD_tile:
6087 case OMPD_unroll:
6088 case OMPD_sections:
6089 case OMPD_section:
6090 case OMPD_single:
6091 case OMPD_master:
6092 case OMPD_critical:
6093 case OMPD_taskyield:
6094 case OMPD_barrier:
6095 case OMPD_taskwait:
6096 case OMPD_taskgroup:
6097 case OMPD_atomic:
6098 case OMPD_flush:
6099 case OMPD_depobj:
6100 case OMPD_scan:
6101 case OMPD_teams:
6102 case OMPD_target_data:
6103 case OMPD_target_exit_data:
6104 case OMPD_target_enter_data:
6105 case OMPD_distribute:
6106 case OMPD_distribute_simd:
6107 case OMPD_distribute_parallel_for:
6108 case OMPD_distribute_parallel_for_simd:
6109 case OMPD_teams_distribute:
6110 case OMPD_teams_distribute_simd:
6111 case OMPD_teams_distribute_parallel_for:
6112 case OMPD_teams_distribute_parallel_for_simd:
6113 case OMPD_target_update:
6114 case OMPD_declare_simd:
6115 case OMPD_declare_variant:
6116 case OMPD_begin_declare_variant:
6117 case OMPD_end_declare_variant:
6118 case OMPD_declare_target:
6119 case OMPD_end_declare_target:
6120 case OMPD_declare_reduction:
6121 case OMPD_declare_mapper:
6122 case OMPD_taskloop:
6123 case OMPD_taskloop_simd:
6124 case OMPD_master_taskloop:
6125 case OMPD_master_taskloop_simd:
6126 case OMPD_parallel_master_taskloop:
6127 case OMPD_parallel_master_taskloop_simd:
6128 case OMPD_requires:
6129 case OMPD_metadirective:
6130 case OMPD_unknown:
6131 break;
6132 default:
6133 break;
6134 }
6135 llvm_unreachable("Unexpected directive kind.");
6136}
6137
6139 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6140 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6141 "Clauses associated with the teams directive expected to be emitted "
6142 "only for the host!");
6143 CGBuilderTy &Bld = CGF.Builder;
6144 int32_t MinNT = -1, MaxNT = -1;
6145 const Expr *NumTeams =
6146 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6147 if (NumTeams != nullptr) {
6148 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6149
6150 switch (DirectiveKind) {
6151 case OMPD_target: {
6152 const auto *CS = D.getInnermostCapturedStmt();
6153 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6154 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6155 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6156 /*IgnoreResultAssign*/ true);
6157 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6158 /*isSigned=*/true);
6159 }
6160 case OMPD_target_teams:
6161 case OMPD_target_teams_distribute:
6162 case OMPD_target_teams_distribute_simd:
6163 case OMPD_target_teams_distribute_parallel_for:
6164 case OMPD_target_teams_distribute_parallel_for_simd: {
6165 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6166 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6167 /*IgnoreResultAssign*/ true);
6168 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6169 /*isSigned=*/true);
6170 }
6171 default:
6172 break;
6173 }
6174 }
6175
6176 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6177 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6178}
6179
6180/// Check for a num threads constant value (stored in \p DefaultVal), or
6181/// expression (stored in \p E). If the value is conditional (via an if-clause),
6182/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6183/// nullptr, no expression evaluation is perfomed.
6184static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6185 const Expr **E, int32_t &UpperBound,
6186 bool UpperBoundOnly, llvm::Value **CondVal) {
6188 CGF.getContext(), CS->getCapturedStmt());
6189 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6190 if (!Dir)
6191 return;
6192
6193 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6194 // Handle if clause. If if clause present, the number of threads is
6195 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6196 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6197 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6198 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6199 const OMPIfClause *IfClause = nullptr;
6200 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6201 if (C->getNameModifier() == OMPD_unknown ||
6202 C->getNameModifier() == OMPD_parallel) {
6203 IfClause = C;
6204 break;
6205 }
6206 }
6207 if (IfClause) {
6208 const Expr *CondExpr = IfClause->getCondition();
6209 bool Result;
6210 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6211 if (!Result) {
6212 UpperBound = 1;
6213 return;
6214 }
6215 } else {
6216 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6217 if (const auto *PreInit =
6218 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6219 for (const auto *I : PreInit->decls()) {
6220 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6221 CGF.EmitVarDecl(cast<VarDecl>(*I));
6222 } else {
6223 CodeGenFunction::AutoVarEmission Emission =
6224 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6225 CGF.EmitAutoVarCleanups(Emission);
6226 }
6227 }
6228 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6229 }
6230 }
6231 }
6232 }
6233 // Check the value of num_threads clause iff if clause was not specified
6234 // or is not evaluated to false.
6235 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6236 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6237 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6238 const auto *NumThreadsClause =
6239 Dir->getSingleClause<OMPNumThreadsClause>();
6240 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6241 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6242 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6243 UpperBound =
6244 UpperBound
6245 ? Constant->getZExtValue()
6246 : std::min(UpperBound,
6247 static_cast<int32_t>(Constant->getZExtValue()));
6248 // If we haven't found a upper bound, remember we saw a thread limiting
6249 // clause.
6250 if (UpperBound == -1)
6251 UpperBound = 0;
6252 if (!E)
6253 return;
6254 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6255 if (const auto *PreInit =
6256 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6257 for (const auto *I : PreInit->decls()) {
6258 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6259 CGF.EmitVarDecl(cast<VarDecl>(*I));
6260 } else {
6261 CodeGenFunction::AutoVarEmission Emission =
6262 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6263 CGF.EmitAutoVarCleanups(Emission);
6264 }
6265 }
6266 }
6267 *E = NTExpr;
6268 }
6269 return;
6270 }
6271 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6272 UpperBound = 1;
6273}
6274
6276 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6277 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6278 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6279 "Clauses associated with the teams directive expected to be emitted "
6280 "only for the host!");
6281 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6282 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6283 "Expected target-based executable directive.");
6284
6285 const Expr *NT = nullptr;
6286 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6287
6288 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6289 if (E->isIntegerConstantExpr(CGF.getContext())) {
6290 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6291 UpperBound = UpperBound ? Constant->getZExtValue()
6292 : std::min(UpperBound,
6293 int32_t(Constant->getZExtValue()));
6294 }
6295 // If we haven't found a upper bound, remember we saw a thread limiting
6296 // clause.
6297 if (UpperBound == -1)
6298 UpperBound = 0;
6299 if (EPtr)
6300 *EPtr = E;
6301 };
6302
6303 auto ReturnSequential = [&]() {
6304 UpperBound = 1;
6305 return NT;
6306 };
6307
6308 switch (DirectiveKind) {
6309 case OMPD_target: {
6310 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6311 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6313 CGF.getContext(), CS->getCapturedStmt());
6314 // TODO: The standard is not clear how to resolve two thread limit clauses,
6315 // let's pick the teams one if it's present, otherwise the target one.
6316 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6317 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6318 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6319 ThreadLimitClause = TLC;
6320 if (ThreadLimitExpr) {
6321 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6322 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6324 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6325 if (const auto *PreInit =
6326 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6327 for (const auto *I : PreInit->decls()) {
6328 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6329 CGF.EmitVarDecl(cast<VarDecl>(*I));
6330 } else {
6332 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6333 CGF.EmitAutoVarCleanups(Emission);
6334 }
6335 }
6336 }
6337 }
6338 }
6339 }
6340 if (ThreadLimitClause)
6341 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6342 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6343 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6344 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6345 CS = Dir->getInnermostCapturedStmt();
6347 CGF.getContext(), CS->getCapturedStmt());
6348 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6349 }
6350 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6351 CS = Dir->getInnermostCapturedStmt();
6352 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6353 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6354 return ReturnSequential();
6355 }
6356 return NT;
6357 }
6358 case OMPD_target_teams: {
6360 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6361 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6362 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6363 }
6364 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6365 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6367 CGF.getContext(), CS->getCapturedStmt());
6368 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6369 if (Dir->getDirectiveKind() == OMPD_distribute) {
6370 CS = Dir->getInnermostCapturedStmt();
6371 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6372 }
6373 }
6374 return NT;
6375 }
6376 case OMPD_target_teams_distribute:
6378 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6379 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6380 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6381 }
6382 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6383 UpperBoundOnly, CondVal);
6384 return NT;
6385 case OMPD_target_teams_loop:
6386 case OMPD_target_parallel_loop:
6387 case OMPD_target_parallel:
6388 case OMPD_target_parallel_for:
6389 case OMPD_target_parallel_for_simd:
6390 case OMPD_target_teams_distribute_parallel_for:
6391 case OMPD_target_teams_distribute_parallel_for_simd: {
6392 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6393 const OMPIfClause *IfClause = nullptr;
6394 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6395 if (C->getNameModifier() == OMPD_unknown ||
6396 C->getNameModifier() == OMPD_parallel) {
6397 IfClause = C;
6398 break;
6399 }
6400 }
6401 if (IfClause) {
6402 const Expr *Cond = IfClause->getCondition();
6403 bool Result;
6404 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6405 if (!Result)
6406 return ReturnSequential();
6407 } else {
6409 *CondVal = CGF.EvaluateExprAsBool(Cond);
6410 }
6411 }
6412 }
6414 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6415 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6416 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6417 }
6419 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6420 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6421 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6422 return NumThreadsClause->getNumThreads();
6423 }
6424 return NT;
6425 }
6426 case OMPD_target_teams_distribute_simd:
6427 case OMPD_target_simd:
6428 return ReturnSequential();
6429 default:
6430 break;
6431 }
6432 llvm_unreachable("Unsupported directive kind.");
6433}
6434
6436 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6437 llvm::Value *NumThreadsVal = nullptr;
6438 llvm::Value *CondVal = nullptr;
6439 llvm::Value *ThreadLimitVal = nullptr;
6440 const Expr *ThreadLimitExpr = nullptr;
6441 int32_t UpperBound = -1;
6442
6444 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6445 &ThreadLimitExpr);
6446
6447 // Thread limit expressions are used below, emit them.
6448 if (ThreadLimitExpr) {
6449 ThreadLimitVal =
6450 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6451 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6452 /*isSigned=*/false);
6453 }
6454
6455 // Generate the num teams expression.
6456 if (UpperBound == 1) {
6457 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6458 } else if (NT) {
6459 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6460 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6461 /*isSigned=*/false);
6462 } else if (ThreadLimitVal) {
6463 // If we do not have a num threads value but a thread limit, replace the
6464 // former with the latter. We know handled the thread limit expression.
6465 NumThreadsVal = ThreadLimitVal;
6466 ThreadLimitVal = nullptr;
6467 } else {
6468 // Default to "0" which means runtime choice.
6469 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6470 NumThreadsVal = CGF.Builder.getInt32(0);
6471 }
6472
6473 // Handle if clause. If if clause present, the number of threads is
6474 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6475 if (CondVal) {
6477 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6478 CGF.Builder.getInt32(1));
6479 }
6480
6481 // If the thread limit and num teams expression were present, take the
6482 // minimum.
6483 if (ThreadLimitVal) {
6484 NumThreadsVal = CGF.Builder.CreateSelect(
6485 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6486 ThreadLimitVal, NumThreadsVal);
6487 }
6488
6489 return NumThreadsVal;
6490}
6491
6492namespace {
6494
6495// Utility to handle information from clauses associated with a given
6496// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6497// It provides a convenient interface to obtain the information and generate
6498// code for that information.
6499class MappableExprsHandler {
6500public:
6501 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6502 static unsigned getFlagMemberOffset() {
6503 unsigned Offset = 0;
6504 for (uint64_t Remain =
6505 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6506 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6507 !(Remain & 1); Remain = Remain >> 1)
6508 Offset++;
6509 return Offset;
6510 }
6511
6512 /// Class that holds debugging information for a data mapping to be passed to
6513 /// the runtime library.
6514 class MappingExprInfo {
6515 /// The variable declaration used for the data mapping.
6516 const ValueDecl *MapDecl = nullptr;
6517 /// The original expression used in the map clause, or null if there is
6518 /// none.
6519 const Expr *MapExpr = nullptr;
6520
6521 public:
6522 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6523 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6524
6525 const ValueDecl *getMapDecl() const { return MapDecl; }
6526 const Expr *getMapExpr() const { return MapExpr; }
6527 };
6528
6529 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6530 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6531 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6532 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6533 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6534 using MapNonContiguousArrayTy =
6535 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6536 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6537 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6538
6539 /// This structure contains combined information generated for mappable
6540 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6541 /// mappers, and non-contiguous information.
6542 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6543 MapExprsArrayTy Exprs;
6544 MapValueDeclsArrayTy Mappers;
6545 MapValueDeclsArrayTy DevicePtrDecls;
6546
6547 /// Append arrays in \a CurInfo.
6548 void append(MapCombinedInfoTy &CurInfo) {
6549 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6550 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6551 CurInfo.DevicePtrDecls.end());
6552 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6553 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6554 }
6555 };
6556
6557 /// Map between a struct and the its lowest & highest elements which have been
6558 /// mapped.
6559 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6560 /// HE(FieldIndex, Pointer)}
6561 struct StructRangeInfoTy {
6562 MapCombinedInfoTy PreliminaryMapData;
6563 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6564 0, Address::invalid()};
6565 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6566 0, Address::invalid()};
6569 bool IsArraySection = false;
6570 bool HasCompleteRecord = false;
6571 };
6572
6573private:
6574 /// Kind that defines how a device pointer has to be returned.
6575 struct MapInfo {
6579 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6580 bool ReturnDevicePointer = false;
6581 bool IsImplicit = false;
6582 const ValueDecl *Mapper = nullptr;
6583 const Expr *VarRef = nullptr;
6584 bool ForDeviceAddr = false;
6585
6586 MapInfo() = default;
6587 MapInfo(
6589 OpenMPMapClauseKind MapType,
6591 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6592 bool ReturnDevicePointer, bool IsImplicit,
6593 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6594 bool ForDeviceAddr = false)
6595 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6596 MotionModifiers(MotionModifiers),
6597 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6598 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6599 };
6600
6601 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6602 /// member and there is no map information about it, then emission of that
6603 /// entry is deferred until the whole struct has been processed.
6604 struct DeferredDevicePtrEntryTy {
6605 const Expr *IE = nullptr;
6606 const ValueDecl *VD = nullptr;
6607 bool ForDeviceAddr = false;
6608
6609 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6610 bool ForDeviceAddr)
6611 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6612 };
6613
6614 /// The target directive from where the mappable clauses were extracted. It
6615 /// is either a executable directive or a user-defined mapper directive.
6616 llvm::PointerUnion<const OMPExecutableDirective *,
6617 const OMPDeclareMapperDecl *>
6618 CurDir;
6619
6620 /// Function the directive is being generated for.
6621 CodeGenFunction &CGF;
6622
6623 /// Set of all first private variables in the current directive.
6624 /// bool data is set to true if the variable is implicitly marked as
6625 /// firstprivate, false otherwise.
6626 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6627
6628 /// Map between device pointer declarations and their expression components.
6629 /// The key value for declarations in 'this' is null.
6630 llvm::DenseMap<
6631 const ValueDecl *,
6633 DevPointersMap;
6634
6635 /// Map between device addr declarations and their expression components.
6636 /// The key value for declarations in 'this' is null.
6637 llvm::DenseMap<
6638 const ValueDecl *,
6640 HasDevAddrsMap;
6641
6642 /// Map between lambda declarations and their map type.
6643 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6644
6645 llvm::Value *getExprTypeSize(const Expr *E) const {
6646 QualType ExprTy = E->getType().getCanonicalType();
6647
6648 // Calculate the size for array shaping expression.
6649 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6650 llvm::Value *Size =
6651 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6652 for (const Expr *SE : OAE->getDimensions()) {
6653 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6654 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6655 CGF.getContext().getSizeType(),
6656 SE->getExprLoc());
6657 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6658 }
6659 return Size;
6660 }
6661
6662 // Reference types are ignored for mapping purposes.
6663 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6664 ExprTy = RefTy->getPointeeType().getCanonicalType();
6665
6666 // Given that an array section is considered a built-in type, we need to
6667 // do the calculation based on the length of the section instead of relying
6668 // on CGF.getTypeSize(E->getType()).
6669 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6671 OAE->getBase()->IgnoreParenImpCasts())
6673
6674 // If there is no length associated with the expression and lower bound is
6675 // not specified too, that means we are using the whole length of the
6676 // base.
6677 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6678 !OAE->getLowerBound())
6679 return CGF.getTypeSize(BaseTy);
6680
6681 llvm::Value *ElemSize;
6682 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6683 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6684 } else {
6685 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6686 assert(ATy && "Expecting array type if not a pointer type.");
6687 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6688 }
6689
6690 // If we don't have a length at this point, that is because we have an
6691 // array section with a single element.
6692 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6693 return ElemSize;
6694
6695 if (const Expr *LenExpr = OAE->getLength()) {
6696 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6697 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6698 CGF.getContext().getSizeType(),
6699 LenExpr->getExprLoc());
6700 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6701 }
6702 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6703 OAE->getLowerBound() && "expected array_section[lb:].");
6704 // Size = sizetype - lb * elemtype;
6705 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6706 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6707 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6708 CGF.getContext().getSizeType(),
6709 OAE->getLowerBound()->getExprLoc());
6710 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6711 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6712 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6713 LengthVal = CGF.Builder.CreateSelect(
6714 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6715 return LengthVal;
6716 }
6717 return CGF.getTypeSize(ExprTy);
6718 }
6719
6720 /// Return the corresponding bits for a given map clause modifier. Add
6721 /// a flag marking the map as a pointer if requested. Add a flag marking the
6722 /// map as the first one of a series of maps that relate to the same map
6723 /// expression.
6724 OpenMPOffloadMappingFlags getMapTypeBits(
6726 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6727 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6728 OpenMPOffloadMappingFlags Bits =
6729 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6730 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6731 switch (MapType) {
6732 case OMPC_MAP_alloc:
6733 case OMPC_MAP_release:
6734 // alloc and release is the default behavior in the runtime library, i.e.
6735 // if we don't pass any bits alloc/release that is what the runtime is
6736 // going to do. Therefore, we don't need to signal anything for these two
6737 // type modifiers.
6738 break;
6739 case OMPC_MAP_to:
6740 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6741 break;
6742 case OMPC_MAP_from:
6743 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6744 break;
6745 case OMPC_MAP_tofrom:
6746 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6747 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6748 break;
6749 case OMPC_MAP_delete:
6750 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6751 break;
6752 case OMPC_MAP_unknown:
6753 llvm_unreachable("Unexpected map type!");
6754 }
6755 if (AddPtrFlag)
6756 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6757 if (AddIsTargetParamFlag)
6758 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6759 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6760 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6761 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6762 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6763 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6764 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6765 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6766 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6767 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6768 if (IsNonContiguous)
6769 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6770 return Bits;
6771 }
6772
6773 /// Return true if the provided expression is a final array section. A
6774 /// final array section, is one whose length can't be proved to be one.
6775 bool isFinalArraySectionExpression(const Expr *E) const {
6776 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
6777
6778 // It is not an array section and therefore not a unity-size one.
6779 if (!OASE)
6780 return false;
6781
6782 // An array section with no colon always refer to a single element.
6783 if (OASE->getColonLocFirst().isInvalid())
6784 return false;
6785
6786 const Expr *Length = OASE->getLength();
6787
6788 // If we don't have a length we have to check if the array has size 1
6789 // for this dimension. Also, we should always expect a length if the
6790 // base type is pointer.
6791 if (!Length) {
6793 OASE->getBase()->IgnoreParenImpCasts())
6795 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6796 return ATy->getSExtSize() != 1;
6797 // If we don't have a constant dimension length, we have to consider
6798 // the current section as having any size, so it is not necessarily
6799 // unitary. If it happen to be unity size, that's user fault.
6800 return true;
6801 }
6802
6803 // Check if the length evaluates to 1.
6804 Expr::EvalResult Result;
6805 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6806 return true; // Can have more that size 1.
6807
6808 llvm::APSInt ConstLength = Result.Val.getInt();
6809 return ConstLength.getSExtValue() != 1;
6810 }
6811
6812 /// Generate the base pointers, section pointers, sizes, map type bits, and
6813 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6814 /// map type, map or motion modifiers, and expression components.
6815 /// \a IsFirstComponent should be set to true if the provided set of
6816 /// components is the first associated with a capture.
6817 void generateInfoForComponentList(
6819 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6821 MapCombinedInfoTy &CombinedInfo,
6822 MapCombinedInfoTy &StructBaseCombinedInfo,
6823 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6824 bool IsImplicit, bool GenerateAllInfoForClauses,
6825 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6826 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6828 OverlappedElements = std::nullopt,
6829 bool AreBothBasePtrAndPteeMapped = false) const {
6830 // The following summarizes what has to be generated for each map and the
6831 // types below. The generated information is expressed in this order:
6832 // base pointer, section pointer, size, flags
6833 // (to add to the ones that come from the map type and modifier).
6834 //
6835 // double d;
6836 // int i[100];
6837 // float *p;
6838 // int **a = &i;
6839 //
6840 // struct S1 {
6841 // int i;
6842 // float f[50];
6843 // }
6844 // struct S2 {
6845 // int i;
6846 // float f[50];
6847 // S1 s;
6848 // double *p;
6849 // struct S2 *ps;
6850 // int &ref;
6851 // }
6852 // S2 s;
6853 // S2 *ps;
6854 //
6855 // map(d)
6856 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6857 //
6858 // map(i)
6859 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6860 //
6861 // map(i[1:23])
6862 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6863 //
6864 // map(p)
6865 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6866 //
6867 // map(p[1:24])
6868 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6869 // in unified shared memory mode or for local pointers
6870 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6871 //
6872 // map((*a)[0:3])
6873 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6874 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6875 //
6876 // map(**a)
6877 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6878 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6879 //
6880 // map(s)
6881 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6882 //
6883 // map(s.i)
6884 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6885 //
6886 // map(s.s.f)
6887 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6888 //
6889 // map(s.p)
6890 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6891 //
6892 // map(to: s.p[:22])
6893 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6894 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6895 // &(s.p), &(s.p[0]), 22*sizeof(double),
6896 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6897 // (*) alloc space for struct members, only this is a target parameter
6898 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6899 // optimizes this entry out, same in the examples below)
6900 // (***) map the pointee (map: to)
6901 //
6902 // map(to: s.ref)
6903 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6904 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6905 // (*) alloc space for struct members, only this is a target parameter
6906 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6907 // optimizes this entry out, same in the examples below)
6908 // (***) map the pointee (map: to)
6909 //
6910 // map(s.ps)
6911 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6912 //
6913 // map(from: s.ps->s.i)
6914 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6915 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6916 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6917 //
6918 // map(to: s.ps->ps)
6919 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6920 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6921 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6922 //
6923 // map(s.ps->ps->ps)
6924 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6925 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6926 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6927 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6928 //
6929 // map(to: s.ps->ps->s.f[:22])
6930 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6931 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6932 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6933 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6934 //
6935 // map(ps)
6936 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6937 //
6938 // map(ps->i)
6939 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6940 //
6941 // map(ps->s.f)
6942 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6943 //
6944 // map(from: ps->p)
6945 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6946 //
6947 // map(to: ps->p[:22])
6948 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6949 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6950 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6951 //
6952 // map(ps->ps)
6953 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6954 //
6955 // map(from: ps->ps->s.i)
6956 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6957 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6958 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6959 //
6960 // map(from: ps->ps->ps)
6961 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6962 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6963 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6964 //
6965 // map(ps->ps->ps->ps)
6966 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6967 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6968 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6969 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6970 //
6971 // map(to: ps->ps->ps->s.f[:22])
6972 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6973 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6974 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6975 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6976 //
6977 // map(to: s.f[:22]) map(from: s.p[:33])
6978 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6979 // sizeof(double*) (**), TARGET_PARAM
6980 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6981 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6982 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6983 // (*) allocate contiguous space needed to fit all mapped members even if
6984 // we allocate space for members not mapped (in this example,
6985 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
6986 // them as well because they fall between &s.f[0] and &s.p)
6987 //
6988 // map(from: s.f[:22]) map(to: ps->p[:33])
6989 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6990 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6991 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6992 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6993 // (*) the struct this entry pertains to is the 2nd element in the list of
6994 // arguments, hence MEMBER_OF(2)
6995 //
6996 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6997 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
6998 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
6999 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7000 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7001 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7002 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7003 // (*) the struct this entry pertains to is the 4th element in the list
7004 // of arguments, hence MEMBER_OF(4)
7005 //
7006 // map(p, p[:100])
7007 // ===> map(p[:100])
7008 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7009
7010 // Track if the map information being generated is the first for a capture.
7011 bool IsCaptureFirstInfo = IsFirstComponentList;
7012 // When the variable is on a declare target link or in a to clause with
7013 // unified memory, a reference is needed to hold the host/device address
7014 // of the variable.
7015 bool RequiresReference = false;
7016
7017 // Scan the components from the base to the complete expression.
7018 auto CI = Components.rbegin();
7019 auto CE = Components.rend();
7020 auto I = CI;
7021
7022 // Track if the map information being generated is the first for a list of
7023 // components.
7024 bool IsExpressionFirstInfo = true;
7025 bool FirstPointerInComplexData = false;
7027 const Expr *AssocExpr = I->getAssociatedExpression();
7028 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7029 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7030 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7031
7032 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7033 return;
7034 if (isa<MemberExpr>(AssocExpr)) {
7035 // The base is the 'this' pointer. The content of the pointer is going
7036 // to be the base of the field being mapped.
7037 BP = CGF.LoadCXXThisAddress();
7038 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7039 (OASE &&
7040 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7041 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7042 } else if (OAShE &&
7043 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7044 BP = Address(
7045 CGF.EmitScalarExpr(OAShE->getBase()),
7046 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7047 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7048 } else {
7049 // The base is the reference to the variable.
7050 // BP = &Var.
7051 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7052 if (const auto *VD =
7053 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7054 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7055 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7056 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7057 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7058 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7060 RequiresReference = true;
7062 }
7063 }
7064 }
7065
7066 // If the variable is a pointer and is being dereferenced (i.e. is not
7067 // the last component), the base has to be the pointer itself, not its
7068 // reference. References are ignored for mapping purposes.
7069 QualType Ty =
7070 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7071 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7072 // No need to generate individual map information for the pointer, it
7073 // can be associated with the combined storage if shared memory mode is
7074 // active or the base declaration is not global variable.
7075 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7076 if (!AreBothBasePtrAndPteeMapped &&
7078 !VD || VD->hasLocalStorage()))
7079 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7080 else
7081 FirstPointerInComplexData = true;
7082 ++I;
7083 }
7084 }
7085
7086 // Track whether a component of the list should be marked as MEMBER_OF some
7087 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7088 // in a component list should be marked as MEMBER_OF, all subsequent entries
7089 // do not belong to the base struct. E.g.
7090 // struct S2 s;
7091 // s.ps->ps->ps->f[:]
7092 // (1) (2) (3) (4)
7093 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7094 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7095 // is the pointee of ps(2) which is not member of struct s, so it should not
7096 // be marked as such (it is still PTR_AND_OBJ).
7097 // The variable is initialized to false so that PTR_AND_OBJ entries which
7098 // are not struct members are not considered (e.g. array of pointers to
7099 // data).
7100 bool ShouldBeMemberOf = false;
7101
7102 // Variable keeping track of whether or not we have encountered a component
7103 // in the component list which is a member expression. Useful when we have a
7104 // pointer or a final array section, in which case it is the previous
7105 // component in the list which tells us whether we have a member expression.
7106 // E.g. X.f[:]
7107 // While processing the final array section "[:]" it is "f" which tells us
7108 // whether we are dealing with a member of a declared struct.
7109 const MemberExpr *EncounteredME = nullptr;
7110
7111 // Track for the total number of dimension. Start from one for the dummy
7112 // dimension.
7113 uint64_t DimSize = 1;
7114
7115 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7116 bool IsPrevMemberReference = false;
7117
7118 // We need to check if we will be encountering any MEs. If we do not
7119 // encounter any ME expression it means we will be mapping the whole struct.
7120 // In that case we need to skip adding an entry for the struct to the
7121 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7122 // list only when generating all info for clauses.
7123 bool IsMappingWholeStruct = true;
7124 if (!GenerateAllInfoForClauses) {
7125 IsMappingWholeStruct = false;
7126 } else {
7127 for (auto TempI = I; TempI != CE; ++TempI) {
7128 const MemberExpr *PossibleME =
7129 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7130 if (PossibleME) {
7131 IsMappingWholeStruct = false;
7132 break;
7133 }
7134 }
7135 }
7136
7137 for (; I != CE; ++I) {
7138 // If the current component is member of a struct (parent struct) mark it.
7139 if (!EncounteredME) {
7140 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7141 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7142 // as MEMBER_OF the parent struct.
7143 if (EncounteredME) {
7144 ShouldBeMemberOf = true;
7145 // Do not emit as complex pointer if this is actually not array-like
7146 // expression.
7147 if (FirstPointerInComplexData) {
7148 QualType Ty = std::prev(I)
7149 ->getAssociatedDeclaration()
7150 ->getType()
7151 .getNonReferenceType();
7152 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7153 FirstPointerInComplexData = false;
7154 }
7155 }
7156 }
7157
7158 auto Next = std::next(I);
7159
7160 // We need to generate the addresses and sizes if this is the last
7161 // component, if the component is a pointer or if it is an array section
7162 // whose length can't be proved to be one. If this is a pointer, it
7163 // becomes the base address for the following components.
7164
7165 // A final array section, is one whose length can't be proved to be one.
7166 // If the map item is non-contiguous then we don't treat any array section
7167 // as final array section.
7168 bool IsFinalArraySection =
7169 !IsNonContiguous &&
7170 isFinalArraySectionExpression(I->getAssociatedExpression());
7171
7172 // If we have a declaration for the mapping use that, otherwise use
7173 // the base declaration of the map clause.
7174 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7175 ? I->getAssociatedDeclaration()
7176 : BaseDecl;
7177 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7178 : MapExpr;
7179
7180 // Get information on whether the element is a pointer. Have to do a
7181 // special treatment for array sections given that they are built-in
7182 // types.
7183 const auto *OASE =
7184 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7185 const auto *OAShE =
7186 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7187 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7188 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7189 bool IsPointer =
7190 OAShE ||
7193 ->isAnyPointerType()) ||
7194 I->getAssociatedExpression()->getType()->isAnyPointerType();
7195 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7196 MapDecl &&
7197 MapDecl->getType()->isLValueReferenceType();
7198 bool IsNonDerefPointer = IsPointer &&
7199 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7200 !IsNonContiguous;
7201
7202 if (OASE)
7203 ++DimSize;
7204
7205 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7206 IsFinalArraySection) {
7207 // If this is not the last component, we expect the pointer to be
7208 // associated with an array expression or member expression.
7209 assert((Next == CE ||
7210 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7211 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7212 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7213 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7214 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7215 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7216 "Unexpected expression");
7217
7219 Address LowestElem = Address::invalid();
7220 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7221 const MemberExpr *E) {
7222 const Expr *BaseExpr = E->getBase();
7223 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7224 // scalar.
7225 LValue BaseLV;
7226 if (E->isArrow()) {
7227 LValueBaseInfo BaseInfo;
7228 TBAAAccessInfo TBAAInfo;
7229 Address Addr =
7230 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7231 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7232 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7233 } else {
7234 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7235 }
7236 return BaseLV;
7237 };
7238 if (OAShE) {
7239 LowestElem = LB =
7240 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7242 OAShE->getBase()->getType()->getPointeeType()),
7244 OAShE->getBase()->getType()));
7245 } else if (IsMemberReference) {
7246 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7247 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7248 LowestElem = CGF.EmitLValueForFieldInitialization(
7249 BaseLVal, cast<FieldDecl>(MapDecl))
7250 .getAddress();
7251 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7252 .getAddress();
7253 } else {
7254 LowestElem = LB =
7255 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7256 .getAddress();
7257 }
7258
7259 // If this component is a pointer inside the base struct then we don't
7260 // need to create any entry for it - it will be combined with the object
7261 // it is pointing to into a single PTR_AND_OBJ entry.
7262 bool IsMemberPointerOrAddr =
7263 EncounteredME &&
7264 (((IsPointer || ForDeviceAddr) &&
7265 I->getAssociatedExpression() == EncounteredME) ||
7266 (IsPrevMemberReference && !IsPointer) ||
7267 (IsMemberReference && Next != CE &&
7268 !Next->getAssociatedExpression()->getType()->isPointerType()));
7269 if (!OverlappedElements.empty() && Next == CE) {
7270 // Handle base element with the info for overlapped elements.
7271 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7272 assert(!IsPointer &&
7273 "Unexpected base element with the pointer type.");
7274 // Mark the whole struct as the struct that requires allocation on the
7275 // device.
7276 PartialStruct.LowestElem = {0, LowestElem};
7277 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7278 I->getAssociatedExpression()->getType());
7281 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7282 TypeSize.getQuantity() - 1);
7283 PartialStruct.HighestElem = {
7284 std::numeric_limits<decltype(
7285 PartialStruct.HighestElem.first)>::max(),
7286 HB};
7287 PartialStruct.Base = BP;
7288 PartialStruct.LB = LB;
7289 assert(
7290 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7291 "Overlapped elements must be used only once for the variable.");
7292 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7293 // Emit data for non-overlapped data.
7294 OpenMPOffloadMappingFlags Flags =
7295 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7296 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7297 /*AddPtrFlag=*/false,
7298 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7299 llvm::Value *Size = nullptr;
7300 // Do bitcopy of all non-overlapped structure elements.
7302 Component : OverlappedElements) {
7303 Address ComponentLB = Address::invalid();
7305 Component) {
7306 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7307 const auto *FD = dyn_cast<FieldDecl>(VD);
7308 if (FD && FD->getType()->isLValueReferenceType()) {
7309 const auto *ME =
7310 cast<MemberExpr>(MC.getAssociatedExpression());
7311 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7312 ComponentLB =
7313 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7314 .getAddress();
7315 } else {
7316 ComponentLB =
7317 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7318 .getAddress();
7319 }
7320 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7321 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7322 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7323 LBPtr);
7324 break;
7325 }
7326 }
7327 assert(Size && "Failed to determine structure size");
7328 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7329 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7330 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7331 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7332 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7333 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7334 Size, CGF.Int64Ty, /*isSigned=*/true));
7335 CombinedInfo.Types.push_back(Flags);
7336 CombinedInfo.Mappers.push_back(nullptr);
7337 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7338 : 1);
7339 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7340 }
7341 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7342 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7343 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7344 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7345 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7346 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7347 Size = CGF.Builder.CreatePtrDiff(
7348 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7349 LBPtr);
7350 CombinedInfo.Sizes.push_back(
7351 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7352 CombinedInfo.Types.push_back(Flags);
7353 CombinedInfo.Mappers.push_back(nullptr);
7354 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7355 : 1);
7356 break;
7357 }
7358 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7359 // Skip adding an entry in the CurInfo of this combined entry if the
7360 // whole struct is currently being mapped. The struct needs to be added
7361 // in the first position before any data internal to the struct is being
7362 // mapped.
7363 if (!IsMemberPointerOrAddr ||
7364 (Next == CE && MapType != OMPC_MAP_unknown)) {
7365 if (!IsMappingWholeStruct) {
7366 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7367 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7368 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7369 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7370 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7371 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7372 Size, CGF.Int64Ty, /*isSigned=*/true));
7373 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7374 : 1);
7375 } else {
7376 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7377 StructBaseCombinedInfo.BasePointers.push_back(
7378 BP.emitRawPointer(CGF));
7379 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7380 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7381 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7382 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7383 Size, CGF.Int64Ty, /*isSigned=*/true));
7384 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7385 IsNonContiguous ? DimSize : 1);
7386 }
7387
7388 // If Mapper is valid, the last component inherits the mapper.
7389 bool HasMapper = Mapper && Next == CE;
7390 if (!IsMappingWholeStruct)
7391 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7392 else
7393 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7394 : nullptr);
7395
7396 // We need to add a pointer flag for each map that comes from the
7397 // same expression except for the first one. We also need to signal
7398 // this map is the first one that relates with the current capture
7399 // (there is a set of entries for each capture).
7400 OpenMPOffloadMappingFlags Flags =
7401 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7402 !IsExpressionFirstInfo || RequiresReference ||
7403 FirstPointerInComplexData || IsMemberReference,
7404 AreBothBasePtrAndPteeMapped ||
7405 (IsCaptureFirstInfo && !RequiresReference),
7406 IsNonContiguous);
7407
7408 if (!IsExpressionFirstInfo || IsMemberReference) {
7409 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7410 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7411 if (IsPointer || (IsMemberReference && Next != CE))
7412 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7413 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7414 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7415 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7416 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7417
7418 if (ShouldBeMemberOf) {
7419 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7420 // should be later updated with the correct value of MEMBER_OF.
7421 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7422 // From now on, all subsequent PTR_AND_OBJ entries should not be
7423 // marked as MEMBER_OF.
7424 ShouldBeMemberOf = false;
7425 }
7426 }
7427
7428 if (!IsMappingWholeStruct)
7429 CombinedInfo.Types.push_back(Flags);
7430 else
7431 StructBaseCombinedInfo.Types.push_back(Flags);
7432 }
7433
7434 // If we have encountered a member expression so far, keep track of the
7435 // mapped member. If the parent is "*this", then the value declaration
7436 // is nullptr.
7437 if (EncounteredME) {
7438 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7439 unsigned FieldIndex = FD->getFieldIndex();
7440
7441 // Update info about the lowest and highest elements for this struct
7442 if (!PartialStruct.Base.isValid()) {
7443 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7444 if (IsFinalArraySection) {
7445 Address HB =
7446 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7447 .getAddress();
7448 PartialStruct.HighestElem = {FieldIndex, HB};
7449 } else {
7450 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7451 }
7452 PartialStruct.Base = BP;
7453 PartialStruct.LB = BP;
7454 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7455 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7456 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7457 if (IsFinalArraySection) {
7458 Address HB =
7459 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7460 .getAddress();
7461 PartialStruct.HighestElem = {FieldIndex, HB};
7462 } else {
7463 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7464 }
7465 }
7466 }
7467
7468 // Need to emit combined struct for array sections.
7469 if (IsFinalArraySection || IsNonContiguous)
7470 PartialStruct.IsArraySection = true;
7471
7472 // If we have a final array section, we are done with this expression.
7473 if (IsFinalArraySection)
7474 break;
7475
7476 // The pointer becomes the base for the next element.
7477 if (Next != CE)
7478 BP = IsMemberReference ? LowestElem : LB;
7479
7480 IsExpressionFirstInfo = false;
7481 IsCaptureFirstInfo = false;
7482 FirstPointerInComplexData = false;
7483 IsPrevMemberReference = IsMemberReference;
7484 } else if (FirstPointerInComplexData) {
7485 QualType Ty = Components.rbegin()
7486 ->getAssociatedDeclaration()
7487 ->getType()
7488 .getNonReferenceType();
7489 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7490 FirstPointerInComplexData = false;
7491 }
7492 }
7493 // If ran into the whole component - allocate the space for the whole
7494 // record.
7495 if (!EncounteredME)
7496 PartialStruct.HasCompleteRecord = true;
7497
7498 if (!IsNonContiguous)
7499 return;
7500
7501 const ASTContext &Context = CGF.getContext();
7502
7503 // For supporting stride in array section, we need to initialize the first
7504 // dimension size as 1, first offset as 0, and first count as 1
7505 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7506 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7507 MapValuesArrayTy CurStrides;
7508 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7509 uint64_t ElementTypeSize;
7510
7511 // Collect Size information for each dimension and get the element size as
7512 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7513 // should be [10, 10] and the first stride is 4 btyes.
7515 Components) {
7516 const Expr *AssocExpr = Component.getAssociatedExpression();
7517 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7518
7519 if (!OASE)
7520 continue;
7521
7522 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7523 auto *CAT = Context.getAsConstantArrayType(Ty);
7524 auto *VAT = Context.getAsVariableArrayType(Ty);
7525
7526 // We need all the dimension size except for the last dimension.
7527 assert((VAT || CAT || &Component == &*Components.begin()) &&
7528 "Should be either ConstantArray or VariableArray if not the "
7529 "first Component");
7530
7531 // Get element size if CurStrides is empty.
7532 if (CurStrides.empty()) {
7533 const Type *ElementType = nullptr;
7534 if (CAT)
7535 ElementType = CAT->getElementType().getTypePtr();
7536 else if (VAT)
7537 ElementType = VAT->getElementType().getTypePtr();
7538 else
7539 assert(&Component == &*Components.begin() &&
7540 "Only expect pointer (non CAT or VAT) when this is the "
7541 "first Component");
7542 // If ElementType is null, then it means the base is a pointer
7543 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7544 // for next iteration.
7545 if (ElementType) {
7546 // For the case that having pointer as base, we need to remove one
7547 // level of indirection.
7548 if (&Component != &*Components.begin())
7549 ElementType = ElementType->getPointeeOrArrayElementType();
7550 ElementTypeSize =
7551 Context.getTypeSizeInChars(ElementType).getQuantity();
7552 CurStrides.push_back(
7553 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7554 }
7555 }
7556 // Get dimension value except for the last dimension since we don't need
7557 // it.
7558 if (DimSizes.size() < Components.size() - 1) {
7559 if (CAT)
7560 DimSizes.push_back(
7561 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7562 else if (VAT)
7563 DimSizes.push_back(CGF.Builder.CreateIntCast(
7564 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7565 /*IsSigned=*/false));
7566 }
7567 }
7568
7569 // Skip the dummy dimension since we have already have its information.
7570 auto *DI = DimSizes.begin() + 1;
7571 // Product of dimension.
7572 llvm::Value *DimProd =
7573 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7574
7575 // Collect info for non-contiguous. Notice that offset, count, and stride
7576 // are only meaningful for array-section, so we insert a null for anything
7577 // other than array-section.
7578 // Also, the size of offset, count, and stride are not the same as
7579 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7580 // count, and stride are the same as the number of non-contiguous
7581 // declaration in target update to/from clause.
7583 Components) {
7584 const Expr *AssocExpr = Component.getAssociatedExpression();
7585
7586 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7587 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7588 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7589 /*isSigned=*/false);
7590 CurOffsets.push_back(Offset);
7591 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7592 CurStrides.push_back(CurStrides.back());
7593 continue;
7594 }
7595
7596 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7597
7598 if (!OASE)
7599 continue;
7600
7601 // Offset
7602 const Expr *OffsetExpr = OASE->getLowerBound();
7603 llvm::Value *Offset = nullptr;
7604 if (!OffsetExpr) {
7605 // If offset is absent, then we just set it to zero.
7606 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7607 } else {
7608 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7609 CGF.Int64Ty,
7610 /*isSigned=*/false);
7611 }
7612 CurOffsets.push_back(Offset);
7613
7614 // Count
7615 const Expr *CountExpr = OASE->getLength();
7616 llvm::Value *Count = nullptr;
7617 if (!CountExpr) {
7618 // In Clang, once a high dimension is an array section, we construct all
7619 // the lower dimension as array section, however, for case like
7620 // arr[0:2][2], Clang construct the inner dimension as an array section
7621 // but it actually is not in an array section form according to spec.
7622 if (!OASE->getColonLocFirst().isValid() &&
7623 !OASE->getColonLocSecond().isValid()) {
7624 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7625 } else {
7626 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7627 // When the length is absent it defaults to ⌈(size −
7628 // lower-bound)/stride⌉, where size is the size of the array
7629 // dimension.
7630 const Expr *StrideExpr = OASE->getStride();
7631 llvm::Value *Stride =
7632 StrideExpr
7633 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7634 CGF.Int64Ty, /*isSigned=*/false)
7635 : nullptr;
7636 if (Stride)
7637 Count = CGF.Builder.CreateUDiv(
7638 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7639 else
7640 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7641 }
7642 } else {
7643 Count = CGF.EmitScalarExpr(CountExpr);
7644 }
7645 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7646 CurCounts.push_back(Count);
7647
7648 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7649 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7650 // Offset Count Stride
7651 // D0 0 1 4 (int) <- dummy dimension
7652 // D1 0 2 8 (2 * (1) * 4)
7653 // D2 1 2 20 (1 * (1 * 5) * 4)
7654 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7655 const Expr *StrideExpr = OASE->getStride();
7656 llvm::Value *Stride =
7657 StrideExpr
7658 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7659 CGF.Int64Ty, /*isSigned=*/false)
7660 : nullptr;
7661 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7662 if (Stride)
7663 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7664 else
7665 CurStrides.push_back(DimProd);
7666 if (DI != DimSizes.end())
7667 ++DI;
7668 }
7669
7670 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7671 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7672 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7673 }
7674
7675 /// Return the adjusted map modifiers if the declaration a capture refers to
7676 /// appears in a first-private clause. This is expected to be used only with
7677 /// directives that start with 'target'.
7678 OpenMPOffloadMappingFlags
7679 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7680 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7681
7682 // A first private variable captured by reference will use only the
7683 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7684 // declaration is known as first-private in this handler.
7685 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7686 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7687 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7688 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7689 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7690 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7691 }
7692 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7693 if (I != LambdasMap.end())
7694 // for map(to: lambda): using user specified map type.
7695 return getMapTypeBits(
7696 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7697 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7698 /*AddPtrFlag=*/false,
7699 /*AddIsTargetParamFlag=*/false,
7700 /*isNonContiguous=*/false);
7701 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7702 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7703 }
7704
7705 void getPlainLayout(const CXXRecordDecl *RD,
7707 bool AsBase) const {
7708 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7709
7710 llvm::StructType *St =
7711 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7712
7713 unsigned NumElements = St->getNumElements();
7715 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7716 RecordLayout(NumElements);
7717
7718 // Fill bases.
7719 for (const auto &I : RD->bases()) {
7720 if (I.isVirtual())
7721 continue;
7722 const auto *Base = I.getType()->getAsCXXRecordDecl();
7723 // Ignore empty bases.
7724 if (Base->isEmpty() || CGF.getContext()
7727 .isZero())
7728 continue;
7729
7730 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7731 RecordLayout[FieldIndex] = Base;
7732 }
7733 // Fill in virtual bases.
7734 for (const auto &I : RD->vbases()) {
7735 const auto *Base = I.getType()->getAsCXXRecordDecl();
7736 // Ignore empty bases.
7737 if (Base->isEmpty())
7738 continue;
7739 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7740 if (RecordLayout[FieldIndex])
7741 continue;
7742 RecordLayout[FieldIndex] = Base;
7743 }
7744 // Fill in all the fields.
7745 assert(!RD->isUnion() && "Unexpected union.");
7746 for (const auto *Field : RD->fields()) {
7747 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7748 // will fill in later.)
7749 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7750 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7751 RecordLayout[FieldIndex] = Field;
7752 }
7753 }
7754 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7755 &Data : RecordLayout) {
7756 if (Data.isNull())
7757 continue;
7758 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7759 getPlainLayout(Base, Layout, /*AsBase=*/true);
7760 else
7761 Layout.push_back(Data.get<const FieldDecl *>());
7762 }
7763 }
7764
7765 /// Generate all the base pointers, section pointers, sizes, map types, and
7766 /// mappers for the extracted mappable expressions (all included in \a
7767 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7768 /// pair of the relevant declaration and index where it occurs is appended to
7769 /// the device pointers info array.
7770 void generateAllInfoForClauses(
7771 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7772 llvm::OpenMPIRBuilder &OMPBuilder,
7773 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7775 // We have to process the component lists that relate with the same
7776 // declaration in a single chunk so that we can generate the map flags
7777 // correctly. Therefore, we organize all lists in a map.
7778 enum MapKind { Present, Allocs, Other, Total };
7779 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7781 Info;
7782
7783 // Helper function to fill the information map for the different supported
7784 // clauses.
7785 auto &&InfoGen =
7786 [&Info, &SkipVarSet](
7787 const ValueDecl *D, MapKind Kind,
7789 OpenMPMapClauseKind MapType,
7791 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7792 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7793 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7794 if (SkipVarSet.contains(D))
7795 return;
7796 auto It = Info.find(D);
7797 if (It == Info.end())
7798 It = Info
7799 .insert(std::make_pair(
7800 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7801 .first;
7802 It->second[Kind].emplace_back(
7803 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7804 IsImplicit, Mapper, VarRef, ForDeviceAddr);
7805 };
7806
7807 for (const auto *Cl : Clauses) {
7808 const auto *C = dyn_cast<OMPMapClause>(Cl);
7809 if (!C)
7810 continue;
7811 MapKind Kind = Other;
7812 if (llvm::is_contained(C->getMapTypeModifiers(),
7813 OMPC_MAP_MODIFIER_present))
7814 Kind = Present;
7815 else if (C->getMapType() == OMPC_MAP_alloc)
7816 Kind = Allocs;
7817 const auto *EI = C->getVarRefs().begin();
7818 for (const auto L : C->component_lists()) {
7819 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7820 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7821 C->getMapTypeModifiers(), std::nullopt,
7822 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7823 E);
7824 ++EI;
7825 }
7826 }
7827 for (const auto *Cl : Clauses) {
7828 const auto *C = dyn_cast<OMPToClause>(Cl);
7829 if (!C)
7830 continue;
7831 MapKind Kind = Other;
7832 if (llvm::is_contained(C->getMotionModifiers(),
7833 OMPC_MOTION_MODIFIER_present))
7834 Kind = Present;
7835 const auto *EI = C->getVarRefs().begin();
7836 for (const auto L : C->component_lists()) {
7837 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7838 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7839 C->isImplicit(), std::get<2>(L), *EI);
7840 ++EI;
7841 }
7842 }
7843 for (const auto *Cl : Clauses) {
7844 const auto *C = dyn_cast<OMPFromClause>(Cl);
7845 if (!C)
7846 continue;
7847 MapKind Kind = Other;
7848 if (llvm::is_contained(C->getMotionModifiers(),
7849 OMPC_MOTION_MODIFIER_present))
7850 Kind = Present;
7851 const auto *EI = C->getVarRefs().begin();
7852 for (const auto L : C->component_lists()) {
7853 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7854 std::nullopt, C->getMotionModifiers(),
7855 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7856 *EI);
7857 ++EI;
7858 }
7859 }
7860
7861 // Look at the use_device_ptr and use_device_addr clauses information and
7862 // mark the existing map entries as such. If there is no map information for
7863 // an entry in the use_device_ptr and use_device_addr list, we create one
7864 // with map type 'alloc' and zero size section. It is the user fault if that
7865 // was not mapped before. If there is no map information and the pointer is
7866 // a struct member, then we defer the emission of that entry until the whole
7867 // struct has been processed.
7868 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7870 DeferredInfo;
7871 MapCombinedInfoTy UseDeviceDataCombinedInfo;
7872
7873 auto &&UseDeviceDataCombinedInfoGen =
7874 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7875 CodeGenFunction &CGF, bool IsDevAddr) {
7876 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7877 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7878 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7879 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7880 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7881 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7882 UseDeviceDataCombinedInfo.Sizes.push_back(
7883 llvm::Constant::getNullValue(CGF.Int64Ty));
7884 UseDeviceDataCombinedInfo.Types.push_back(
7885 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7886 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7887 };
7888
7889 auto &&MapInfoGen =
7890 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7891 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7893 Components,
7894 bool IsImplicit, bool IsDevAddr) {
7895 // We didn't find any match in our map information - generate a zero
7896 // size array section - if the pointer is a struct member we defer
7897 // this action until the whole struct has been processed.
7898 if (isa<MemberExpr>(IE)) {
7899 // Insert the pointer into Info to be processed by
7900 // generateInfoForComponentList. Because it is a member pointer
7901 // without a pointee, no entry will be generated for it, therefore
7902 // we need to generate one after the whole struct has been
7903 // processed. Nonetheless, generateInfoForComponentList must be
7904 // called to take the pointer into account for the calculation of
7905 // the range of the partial struct.
7906 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7907 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7908 nullptr, nullptr, IsDevAddr);
7909 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7910 } else {
7911 llvm::Value *Ptr;
7912 if (IsDevAddr) {
7913 if (IE->isGLValue())
7914 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7915 else
7916 Ptr = CGF.EmitScalarExpr(IE);
7917 } else {
7918 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7919 }
7920 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7921 }
7922 };
7923
7924 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7925 const Expr *IE, bool IsDevAddr) -> bool {
7926 // We potentially have map information for this declaration already.
7927 // Look for the first set of components that refer to it. If found,
7928 // return true.
7929 // If the first component is a member expression, we have to look into
7930 // 'this', which maps to null in the map of map information. Otherwise
7931 // look directly for the information.
7932 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7933 if (It != Info.end()) {
7934 bool Found = false;
7935 for (auto &Data : It->second) {
7936 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7937 return MI.Components.back().getAssociatedDeclaration() == VD;
7938 });
7939 // If we found a map entry, signal that the pointer has to be
7940 // returned and move on to the next declaration. Exclude cases where
7941 // the base pointer is mapped as array subscript, array section or
7942 // array shaping. The base address is passed as a pointer to base in
7943 // this case and cannot be used as a base for use_device_ptr list
7944 // item.
7945 if (CI != Data.end()) {
7946 if (IsDevAddr) {
7947 CI->ForDeviceAddr = IsDevAddr;
7948 CI->ReturnDevicePointer = true;
7949 Found = true;
7950 break;
7951 } else {
7952 auto PrevCI = std::next(CI->Components.rbegin());
7953 const auto *VarD = dyn_cast<VarDecl>(VD);
7955 isa<MemberExpr>(IE) ||
7956 !VD->getType().getNonReferenceType()->isPointerType() ||
7957 PrevCI == CI->Components.rend() ||
7958 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7959 VarD->hasLocalStorage()) {
7960 CI->ForDeviceAddr = IsDevAddr;
7961 CI->ReturnDevicePointer = true;
7962 Found = true;
7963 break;
7964 }
7965 }
7966 }
7967 }
7968 return Found;
7969 }
7970 return false;
7971 };
7972
7973 // Look at the use_device_ptr clause information and mark the existing map
7974 // entries as such. If there is no map information for an entry in the
7975 // use_device_ptr list, we create one with map type 'alloc' and zero size
7976 // section. It is the user fault if that was not mapped before. If there is
7977 // no map information and the pointer is a struct member, then we defer the
7978 // emission of that entry until the whole struct has been processed.
7979 for (const auto *Cl : Clauses) {
7980 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
7981 if (!C)
7982 continue;
7983 for (const auto L : C->component_lists()) {
7985 std::get<1>(L);
7986 assert(!Components.empty() &&
7987 "Not expecting empty list of components!");
7988 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
7989 VD = cast<ValueDecl>(VD->getCanonicalDecl());
7990 const Expr *IE = Components.back().getAssociatedExpression();
7991 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
7992 continue;
7993 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
7994 /*IsDevAddr=*/false);
7995 }
7996 }
7997
7998 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7999 for (const auto *Cl : Clauses) {
8000 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8001 if (!C)
8002 continue;
8003 for (const auto L : C->component_lists()) {
8005 std::get<1>(L);
8006 assert(!std::get<1>(L).empty() &&
8007 "Not expecting empty list of components!");
8008 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8009 if (!Processed.insert(VD).second)
8010 continue;
8011 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8012 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8013 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8014 continue;
8015 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8016 /*IsDevAddr=*/true);
8017 }
8018 }
8019
8020 for (const auto &Data : Info) {
8021 StructRangeInfoTy PartialStruct;
8022 // Current struct information:
8023 MapCombinedInfoTy CurInfo;
8024 // Current struct base information:
8025 MapCombinedInfoTy StructBaseCurInfo;
8026 const Decl *D = Data.first;
8027 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8028 for (const auto &M : Data.second) {
8029 for (const MapInfo &L : M) {
8030 assert(!L.Components.empty() &&
8031 "Not expecting declaration with no component lists.");
8032
8033 // Remember the current base pointer index.
8034 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8035 unsigned StructBasePointersIdx =
8036 StructBaseCurInfo.BasePointers.size();
8037 CurInfo.NonContigInfo.IsNonContiguous =
8038 L.Components.back().isNonContiguous();
8039 generateInfoForComponentList(
8040 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8041 CurInfo, StructBaseCurInfo, PartialStruct,
8042 /*IsFirstComponentList=*/false, L.IsImplicit,
8043 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8044 L.VarRef);
8045
8046 // If this entry relates to a device pointer, set the relevant
8047 // declaration and add the 'return pointer' flag.
8048 if (L.ReturnDevicePointer) {
8049 // Check whether a value was added to either CurInfo or
8050 // StructBaseCurInfo and error if no value was added to either of
8051 // them:
8052 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8053 StructBasePointersIdx <
8054 StructBaseCurInfo.BasePointers.size()) &&
8055 "Unexpected number of mapped base pointers.");
8056
8057 // Choose a base pointer index which is always valid:
8058 const ValueDecl *RelevantVD =
8059 L.Components.back().getAssociatedDeclaration();
8060 assert(RelevantVD &&
8061 "No relevant declaration related with device pointer??");
8062
8063 // If StructBaseCurInfo has been updated this iteration then work on
8064 // the first new entry added to it i.e. make sure that when multiple
8065 // values are added to any of the lists, the first value added is
8066 // being modified by the assignments below (not the last value
8067 // added).
8068 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8069 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8070 RelevantVD;
8071 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8072 L.ForDeviceAddr ? DeviceInfoTy::Address
8073 : DeviceInfoTy::Pointer;
8074 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8075 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8076 } else {
8077 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8078 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8079 L.ForDeviceAddr ? DeviceInfoTy::Address
8080 : DeviceInfoTy::Pointer;
8081 CurInfo.Types[CurrentBasePointersIdx] |=
8082 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8083 }
8084 }
8085 }
8086 }
8087
8088 // Append any pending zero-length pointers which are struct members and
8089 // used with use_device_ptr or use_device_addr.
8090 auto CI = DeferredInfo.find(Data.first);
8091 if (CI != DeferredInfo.end()) {
8092 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8093 llvm::Value *BasePtr;
8094 llvm::Value *Ptr;
8095 if (L.ForDeviceAddr) {
8096 if (L.IE->isGLValue())
8097 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8098 else
8099 Ptr = this->CGF.EmitScalarExpr(L.IE);
8100 BasePtr = Ptr;
8101 // Entry is RETURN_PARAM. Also, set the placeholder value
8102 // MEMBER_OF=FFFF so that the entry is later updated with the
8103 // correct value of MEMBER_OF.
8104 CurInfo.Types.push_back(
8105 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8106 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8107 } else {
8108 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8109 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8110 L.IE->getExprLoc());
8111 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8112 // placeholder value MEMBER_OF=FFFF so that the entry is later
8113 // updated with the correct value of MEMBER_OF.
8114 CurInfo.Types.push_back(
8115 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8116 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8117 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8118 }
8119 CurInfo.Exprs.push_back(L.VD);
8120 CurInfo.BasePointers.emplace_back(BasePtr);
8121 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8122 CurInfo.DevicePointers.emplace_back(
8123 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8124 CurInfo.Pointers.push_back(Ptr);
8125 CurInfo.Sizes.push_back(
8126 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8127 CurInfo.Mappers.push_back(nullptr);
8128 }
8129 }
8130
8131 // Unify entries in one list making sure the struct mapping precedes the
8132 // individual fields:
8133 MapCombinedInfoTy UnionCurInfo;
8134 UnionCurInfo.append(StructBaseCurInfo);
8135 UnionCurInfo.append(CurInfo);
8136
8137 // If there is an entry in PartialStruct it means we have a struct with
8138 // individual members mapped. Emit an extra combined entry.
8139 if (PartialStruct.Base.isValid()) {
8140 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8141 // Emit a combined entry:
8142 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8143 /*IsMapThis*/ !VD, OMPBuilder, VD);
8144 }
8145
8146 // We need to append the results of this capture to what we already have.
8147 CombinedInfo.append(UnionCurInfo);
8148 }
8149 // Append data for use_device_ptr clauses.
8150 CombinedInfo.append(UseDeviceDataCombinedInfo);
8151 }
8152
8153public:
8154 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8155 : CurDir(&Dir), CGF(CGF) {
8156 // Extract firstprivate clause information.
8157 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8158 for (const auto *D : C->varlists())
8159 FirstPrivateDecls.try_emplace(
8160 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8161 // Extract implicit firstprivates from uses_allocators clauses.
8162 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8163 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8164 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8165 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8166 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8167 /*Implicit=*/true);
8168 else if (const auto *VD = dyn_cast<VarDecl>(
8169 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8170 ->getDecl()))
8171 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8172 }
8173 }
8174 // Extract device pointer clause information.
8175 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8176 for (auto L : C->component_lists())
8177 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8178 // Extract device addr clause information.
8179 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8180 for (auto L : C->component_lists())
8181 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8182 // Extract map information.
8183 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8184 if (C->getMapType() != OMPC_MAP_to)
8185 continue;
8186 for (auto L : C->component_lists()) {
8187 const ValueDecl *VD = std::get<0>(L);
8188 const auto *RD = VD ? VD->getType()
8192 : nullptr;
8193 if (RD && RD->isLambda())
8194 LambdasMap.try_emplace(std::get<0>(L), C);
8195 }
8196 }
8197 }
8198
8199 /// Constructor for the declare mapper directive.
8200 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8201 : CurDir(&Dir), CGF(CGF) {}
8202
8203 /// Generate code for the combined entry if we have a partially mapped struct
8204 /// and take care of the mapping flags of the arguments corresponding to
8205 /// individual struct members.
8206 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8207 MapFlagsArrayTy &CurTypes,
8208 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8209 llvm::OpenMPIRBuilder &OMPBuilder,
8210 const ValueDecl *VD = nullptr,
8211 bool NotTargetParams = true) const {
8212 if (CurTypes.size() == 1 &&
8213 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8214 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8215 !PartialStruct.IsArraySection)
8216 return;
8217 Address LBAddr = PartialStruct.LowestElem.second;
8218 Address HBAddr = PartialStruct.HighestElem.second;
8219 if (PartialStruct.HasCompleteRecord) {
8220 LBAddr = PartialStruct.LB;
8221 HBAddr = PartialStruct.LB;
8222 }
8223 CombinedInfo.Exprs.push_back(VD);
8224 // Base is the base of the struct
8225 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8226 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8227 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8228 // Pointer is the address of the lowest element
8229 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8230 const CXXMethodDecl *MD =
8231 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8232 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8233 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8234 // There should not be a mapper for a combined entry.
8235 if (HasBaseClass) {
8236 // OpenMP 5.2 148:21:
8237 // If the target construct is within a class non-static member function,
8238 // and a variable is an accessible data member of the object for which the
8239 // non-static data member function is invoked, the variable is treated as
8240 // if the this[:1] expression had appeared in a map clause with a map-type
8241 // of tofrom.
8242 // Emit this[:1]
8243 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8245 llvm::Value *Size =
8246 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8247 /*isSigned=*/true);
8248 CombinedInfo.Sizes.push_back(Size);
8249 } else {
8250 CombinedInfo.Pointers.push_back(LB);
8251 // Size is (addr of {highest+1} element) - (addr of lowest element)
8252 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8253 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8254 HBAddr.getElementType(), HB, /*Idx0=*/1);
8255 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8256 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8257 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8258 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8259 /*isSigned=*/false);
8260 CombinedInfo.Sizes.push_back(Size);
8261 }
8262 CombinedInfo.Mappers.push_back(nullptr);
8263 // Map type is always TARGET_PARAM, if generate info for captures.
8264 CombinedInfo.Types.push_back(
8265 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8266 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8267 // If any element has the present modifier, then make sure the runtime
8268 // doesn't attempt to allocate the struct.
8269 if (CurTypes.end() !=
8270 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8271 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8272 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8273 }))
8274 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8275 // Remove TARGET_PARAM flag from the first element
8276 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8277 // If any element has the ompx_hold modifier, then make sure the runtime
8278 // uses the hold reference count for the struct as a whole so that it won't
8279 // be unmapped by an extra dynamic reference count decrement. Add it to all
8280 // elements as well so the runtime knows which reference count to check
8281 // when determining whether it's time for device-to-host transfers of
8282 // individual elements.
8283 if (CurTypes.end() !=
8284 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8285 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8286 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8287 })) {
8288 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8289 for (auto &M : CurTypes)
8290 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8291 }
8292
8293 // All other current entries will be MEMBER_OF the combined entry
8294 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8295 // 0xFFFF in the MEMBER_OF field).
8296 OpenMPOffloadMappingFlags MemberOfFlag =
8297 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8298 for (auto &M : CurTypes)
8299 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8300 }
8301
8302 /// Generate all the base pointers, section pointers, sizes, map types, and
8303 /// mappers for the extracted mappable expressions (all included in \a
8304 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8305 /// pair of the relevant declaration and index where it occurs is appended to
8306 /// the device pointers info array.
8307 void generateAllInfo(
8308 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8309 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8311 assert(CurDir.is<const OMPExecutableDirective *>() &&
8312 "Expect a executable directive");
8313 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8314 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8315 SkipVarSet);
8316 }
8317
8318 /// Generate all the base pointers, section pointers, sizes, map types, and
8319 /// mappers for the extracted map clauses of user-defined mapper (all included
8320 /// in \a CombinedInfo).
8321 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8322 llvm::OpenMPIRBuilder &OMPBuilder) const {
8323 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8324 "Expect a declare mapper directive");
8325 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8326 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8327 OMPBuilder);
8328 }
8329
8330 /// Emit capture info for lambdas for variables captured by reference.
8331 void generateInfoForLambdaCaptures(
8332 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8333 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8335 const auto *RD = VDType->getAsCXXRecordDecl();
8336 if (!RD || !RD->isLambda())
8337 return;
8338 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8339 CGF.getContext().getDeclAlign(VD));
8340 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8341 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8342 FieldDecl *ThisCapture = nullptr;
8343 RD->getCaptureFields(Captures, ThisCapture);
8344 if (ThisCapture) {
8345 LValue ThisLVal =
8346 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8347 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8348 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8349 VDLVal.getPointer(CGF));
8350 CombinedInfo.Exprs.push_back(VD);
8351 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8352 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8353 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8354 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8355 CombinedInfo.Sizes.push_back(
8356 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8357 CGF.Int64Ty, /*isSigned=*/true));
8358 CombinedInfo.Types.push_back(
8359 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8360 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8361 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8362 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8363 CombinedInfo.Mappers.push_back(nullptr);
8364 }
8365 for (const LambdaCapture &LC : RD->captures()) {
8366 if (!LC.capturesVariable())
8367 continue;
8368 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8369 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8370 continue;
8371 auto It = Captures.find(VD);
8372 assert(It != Captures.end() && "Found lambda capture without field.");
8373 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8374 if (LC.getCaptureKind() == LCK_ByRef) {
8375 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8376 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8377 VDLVal.getPointer(CGF));
8378 CombinedInfo.Exprs.push_back(VD);
8379 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8380 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8381 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8382 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8383 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8384 CGF.getTypeSize(
8386 CGF.Int64Ty, /*isSigned=*/true));
8387 } else {
8388 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8389 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8390 VDLVal.getPointer(CGF));
8391 CombinedInfo.Exprs.push_back(VD);
8392 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8393 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8394 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8395 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8396 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8397 }
8398 CombinedInfo.Types.push_back(
8399 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8400 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8401 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8402 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8403 CombinedInfo.Mappers.push_back(nullptr);
8404 }
8405 }
8406
8407 /// Set correct indices for lambdas captures.
8408 void adjustMemberOfForLambdaCaptures(
8409 llvm::OpenMPIRBuilder &OMPBuilder,
8410 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8411 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8412 MapFlagsArrayTy &Types) const {
8413 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8414 // Set correct member_of idx for all implicit lambda captures.
8415 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8416 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8417 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8418 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8419 continue;
8420 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8421 assert(BasePtr && "Unable to find base lambda address.");
8422 int TgtIdx = -1;
8423 for (unsigned J = I; J > 0; --J) {
8424 unsigned Idx = J - 1;
8425 if (Pointers[Idx] != BasePtr)
8426 continue;
8427 TgtIdx = Idx;
8428 break;
8429 }
8430 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8431 // All other current entries will be MEMBER_OF the combined entry
8432 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8433 // 0xFFFF in the MEMBER_OF field).
8434 OpenMPOffloadMappingFlags MemberOfFlag =
8435 OMPBuilder.getMemberOfFlag(TgtIdx);
8436 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8437 }
8438 }
8439
8440 /// Generate the base pointers, section pointers, sizes, map types, and
8441 /// mappers associated to a given capture (all included in \a CombinedInfo).
8442 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8443 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8444 StructRangeInfoTy &PartialStruct) const {
8445 assert(!Cap->capturesVariableArrayType() &&
8446 "Not expecting to generate map info for a variable array type!");
8447
8448 // We need to know when we generating information for the first component
8449 const ValueDecl *VD = Cap->capturesThis()
8450 ? nullptr
8451 : Cap->getCapturedVar()->getCanonicalDecl();
8452
8453 // for map(to: lambda): skip here, processing it in
8454 // generateDefaultMapInfo
8455 if (LambdasMap.count(VD))
8456 return;
8457
8458 // If this declaration appears in a is_device_ptr clause we just have to
8459 // pass the pointer by value. If it is a reference to a declaration, we just
8460 // pass its value.
8461 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8462 CombinedInfo.Exprs.push_back(VD);
8463 CombinedInfo.BasePointers.emplace_back(Arg);
8464 CombinedInfo.DevicePtrDecls.emplace_back(VD);
8465 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8466 CombinedInfo.Pointers.push_back(Arg);
8467 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8468 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8469 /*isSigned=*/true));
8470 CombinedInfo.Types.push_back(
8471 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8472 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8473 CombinedInfo.Mappers.push_back(nullptr);
8474 return;
8475 }
8476
8477 using MapData =
8480 const ValueDecl *, const Expr *>;
8481 SmallVector<MapData, 4> DeclComponentLists;
8482 // For member fields list in is_device_ptr, store it in
8483 // DeclComponentLists for generating components info.
8485 auto It = DevPointersMap.find(VD);
8486 if (It != DevPointersMap.end())
8487 for (const auto &MCL : It->second)
8488 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8489 /*IsImpicit = */ true, nullptr,
8490 nullptr);
8491 auto I = HasDevAddrsMap.find(VD);
8492 if (I != HasDevAddrsMap.end())
8493 for (const auto &MCL : I->second)
8494 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8495 /*IsImpicit = */ true, nullptr,
8496 nullptr);
8497 assert(CurDir.is<const OMPExecutableDirective *>() &&
8498 "Expect a executable directive");
8499 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8500 bool HasMapBasePtr = false;
8501 bool HasMapArraySec = false;
8502 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8503 const auto *EI = C->getVarRefs().begin();
8504 for (const auto L : C->decl_component_lists(VD)) {
8505 const ValueDecl *VDecl, *Mapper;
8506 // The Expression is not correct if the mapping is implicit
8507 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8509 std::tie(VDecl, Components, Mapper) = L;
8510 assert(VDecl == VD && "We got information for the wrong declaration??");
8511 assert(!Components.empty() &&
8512 "Not expecting declaration with no component lists.");
8513 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8514 HasMapBasePtr = true;
8515 if (VD && E && VD->getType()->isAnyPointerType() &&
8516 (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8517 HasMapArraySec = true;
8518 DeclComponentLists.emplace_back(Components, C->getMapType(),
8519 C->getMapTypeModifiers(),
8520 C->isImplicit(), Mapper, E);
8521 ++EI;
8522 }
8523 }
8524 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8525 const MapData &RHS) {
8526 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8527 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8528 bool HasPresent =
8529 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8530 bool HasAllocs = MapType == OMPC_MAP_alloc;
8531 MapModifiers = std::get<2>(RHS);
8532 MapType = std::get<1>(LHS);
8533 bool HasPresentR =
8534 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8535 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8536 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8537 });
8538
8539 // Find overlapping elements (including the offset from the base element).
8540 llvm::SmallDenseMap<
8541 const MapData *,
8544 4>
8545 OverlappedData;
8546 size_t Count = 0;
8547 for (const MapData &L : DeclComponentLists) {
8549 OpenMPMapClauseKind MapType;
8551 bool IsImplicit;
8552 const ValueDecl *Mapper;
8553 const Expr *VarRef;
8554 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8555 L;
8556 ++Count;
8557 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8559 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8560 VarRef) = L1;
8561 auto CI = Components.rbegin();
8562 auto CE = Components.rend();
8563 auto SI = Components1.rbegin();
8564 auto SE = Components1.rend();
8565 for (; CI != CE && SI != SE; ++CI, ++SI) {
8566 if (CI->getAssociatedExpression()->getStmtClass() !=
8567 SI->getAssociatedExpression()->getStmtClass())
8568 break;
8569 // Are we dealing with different variables/fields?
8570 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8571 break;
8572 }
8573 // Found overlapping if, at least for one component, reached the head
8574 // of the components list.
8575 if (CI == CE || SI == SE) {
8576 // Ignore it if it is the same component.
8577 if (CI == CE && SI == SE)
8578 continue;
8579 const auto It = (SI == SE) ? CI : SI;
8580 // If one component is a pointer and another one is a kind of
8581 // dereference of this pointer (array subscript, section, dereference,
8582 // etc.), it is not an overlapping.
8583 // Same, if one component is a base and another component is a
8584 // dereferenced pointer memberexpr with the same base.
8585 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8586 (std::prev(It)->getAssociatedDeclaration() &&
8587 std::prev(It)
8588 ->getAssociatedDeclaration()
8589 ->getType()
8590 ->isPointerType()) ||
8591 (It->getAssociatedDeclaration() &&
8592 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8593 std::next(It) != CE && std::next(It) != SE))
8594 continue;
8595 const MapData &BaseData = CI == CE ? L : L1;
8597 SI == SE ? Components : Components1;
8598 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8599 OverlappedElements.getSecond().push_back(SubData);
8600 }
8601 }
8602 }
8603 // Sort the overlapped elements for each item.
8605 if (!OverlappedData.empty()) {
8606 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8607 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8608 while (BaseType != OrigType) {
8609 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8610 OrigType = BaseType->getPointeeOrArrayElementType();
8611 }
8612
8613 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8614 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8615 else {
8616 const auto *RD = BaseType->getAsRecordDecl();
8617 Layout.append(RD->field_begin(), RD->field_end());
8618 }
8619 }
8620 for (auto &Pair : OverlappedData) {
8621 llvm::stable_sort(
8622 Pair.getSecond(),
8623 [&Layout](
8626 Second) {
8627 auto CI = First.rbegin();
8628 auto CE = First.rend();
8629 auto SI = Second.rbegin();
8630 auto SE = Second.rend();
8631 for (; CI != CE && SI != SE; ++CI, ++SI) {
8632 if (CI->getAssociatedExpression()->getStmtClass() !=
8633 SI->getAssociatedExpression()->getStmtClass())
8634 break;
8635 // Are we dealing with different variables/fields?
8636 if (CI->getAssociatedDeclaration() !=
8637 SI->getAssociatedDeclaration())
8638 break;
8639 }
8640
8641 // Lists contain the same elements.
8642 if (CI == CE && SI == SE)
8643 return false;
8644
8645 // List with less elements is less than list with more elements.
8646 if (CI == CE || SI == SE)
8647 return CI == CE;
8648
8649 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8650 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8651 if (FD1->getParent() == FD2->getParent())
8652 return FD1->getFieldIndex() < FD2->getFieldIndex();
8653 const auto *It =
8654 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8655 return FD == FD1 || FD == FD2;
8656 });
8657 return *It == FD1;
8658 });
8659 }
8660
8661 // Associated with a capture, because the mapping flags depend on it.
8662 // Go through all of the elements with the overlapped elements.
8663 bool IsFirstComponentList = true;
8664 MapCombinedInfoTy StructBaseCombinedInfo;
8665 for (const auto &Pair : OverlappedData) {
8666 const MapData &L = *Pair.getFirst();
8668 OpenMPMapClauseKind MapType;
8670 bool IsImplicit;
8671 const ValueDecl *Mapper;
8672 const Expr *VarRef;
8673 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8674 L;
8676 OverlappedComponents = Pair.getSecond();
8677 generateInfoForComponentList(
8678 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8679 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8680 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8681 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8682 IsFirstComponentList = false;
8683 }
8684 // Go through other elements without overlapped elements.
8685 for (const MapData &L : DeclComponentLists) {
8687 OpenMPMapClauseKind MapType;
8689 bool IsImplicit;
8690 const ValueDecl *Mapper;
8691 const Expr *VarRef;
8692 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8693 L;
8694 auto It = OverlappedData.find(&L);
8695 if (It == OverlappedData.end())
8696 generateInfoForComponentList(
8697 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8698 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8699 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8700 /*ForDeviceAddr=*/false, VD, VarRef,
8701 /*OverlappedElements*/ std::nullopt,
8702 HasMapBasePtr && HasMapArraySec);
8703 IsFirstComponentList = false;
8704 }
8705 }
8706
8707 /// Generate the default map information for a given capture \a CI,
8708 /// record field declaration \a RI and captured value \a CV.
8709 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8710 const FieldDecl &RI, llvm::Value *CV,
8711 MapCombinedInfoTy &CombinedInfo) const {
8712 bool IsImplicit = true;
8713 // Do the default mapping.
8714 if (CI.capturesThis()) {
8715 CombinedInfo.Exprs.push_back(nullptr);
8716 CombinedInfo.BasePointers.push_back(CV);
8717 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8718 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8719 CombinedInfo.Pointers.push_back(CV);
8720 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8721 CombinedInfo.Sizes.push_back(
8722 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8723 CGF.Int64Ty, /*isSigned=*/true));
8724 // Default map type.
8725 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8726 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8727 } else if (CI.capturesVariableByCopy()) {
8728 const VarDecl *VD = CI.getCapturedVar();
8729 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8730 CombinedInfo.BasePointers.push_back(CV);
8731 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8732 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8733 CombinedInfo.Pointers.push_back(CV);
8734 if (!RI.getType()->isAnyPointerType()) {
8735 // We have to signal to the runtime captures passed by value that are
8736 // not pointers.
8737 CombinedInfo.Types.push_back(
8738 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8739 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8740 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8741 } else {
8742 // Pointers are implicitly mapped with a zero size and no flags
8743 // (other than first map that is added for all implicit maps).
8744 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8745 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8746 }
8747 auto I = FirstPrivateDecls.find(VD);
8748 if (I != FirstPrivateDecls.end())
8749 IsImplicit = I->getSecond();
8750 } else {
8751 assert(CI.capturesVariable() && "Expected captured reference.");
8752 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8753 QualType ElementType = PtrTy->getPointeeType();
8754 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8755 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8756 // The default map type for a scalar/complex type is 'to' because by
8757 // default the value doesn't have to be retrieved. For an aggregate
8758 // type, the default is 'tofrom'.
8759 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8760 const VarDecl *VD = CI.getCapturedVar();
8761 auto I = FirstPrivateDecls.find(VD);
8762 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8763 CombinedInfo.BasePointers.push_back(CV);
8764 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8765 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8766 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8767 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8768 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8770 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
8771 } else {
8772 CombinedInfo.Pointers.push_back(CV);
8773 }
8774 if (I != FirstPrivateDecls.end())
8775 IsImplicit = I->getSecond();
8776 }
8777 // Every default map produces a single argument which is a target parameter.
8778 CombinedInfo.Types.back() |=
8779 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8780
8781 // Add flag stating this is an implicit map.
8782 if (IsImplicit)
8783 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8784
8785 // No user-defined mapper for default mapping.
8786 CombinedInfo.Mappers.push_back(nullptr);
8787 }
8788};
8789} // anonymous namespace
8790
8791// Try to extract the base declaration from a `this->x` expression if possible.
8793 if (!E)
8794 return nullptr;
8795
8796 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
8797 if (const MemberExpr *ME =
8798 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8799 return ME->getMemberDecl();
8800 return nullptr;
8801}
8802
8803/// Emit a string constant containing the names of the values mapped to the
8804/// offloading runtime library.
8805llvm::Constant *
8806emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8807 MappableExprsHandler::MappingExprInfo &MapExprs) {
8808
8809 uint32_t SrcLocStrSize;
8810 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8811 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8812
8814 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8815 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8816 Loc = VD->getLocation();
8817 else
8818 Loc = MapExprs.getMapExpr()->getExprLoc();
8819 } else {
8820 Loc = MapExprs.getMapDecl()->getLocation();
8821 }
8822
8823 std::string ExprName;
8824 if (MapExprs.getMapExpr()) {
8826 llvm::raw_string_ostream OS(ExprName);
8827 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8828 OS.flush();
8829 } else {
8830 ExprName = MapExprs.getMapDecl()->getNameAsString();
8831 }
8832
8834 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8835 PLoc.getLine(), PLoc.getColumn(),
8836 SrcLocStrSize);
8837}
8838
8839/// Emit the arrays used to pass the captures and map information to the
8840/// offloading runtime library. If there is no map or capture information,
8841/// return nullptr by reference.
8843 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8844 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8845 bool IsNonContiguous = false) {
8846 CodeGenModule &CGM = CGF.CGM;
8847
8848 // Reset the array information.
8849 Info.clearArrayInfo();
8850 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8851
8852 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8853 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8854 CGF.AllocaInsertPt->getIterator());
8855 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8856 CGF.Builder.GetInsertPoint());
8857
8858 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8859 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
8860 };
8861 if (CGM.getCodeGenOpts().getDebugInfo() !=
8862 llvm::codegenoptions::NoDebugInfo) {
8863 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
8864 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
8865 FillInfoMap);
8866 }
8867
8868 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8869 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8870 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8871 }
8872 };
8873
8874 auto CustomMapperCB = [&](unsigned int I) {
8875 llvm::Value *MFunc = nullptr;
8876 if (CombinedInfo.Mappers[I]) {
8877 Info.HasMapper = true;
8879 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8880 }
8881 return MFunc;
8882 };
8883 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8884 /*IsNonContiguous=*/true, DeviceAddrCB,
8885 CustomMapperCB);
8886}
8887
8888/// Check for inner distribute directive.
8889static const OMPExecutableDirective *
8891 const auto *CS = D.getInnermostCapturedStmt();
8892 const auto *Body =
8893 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8894 const Stmt *ChildStmt =
8896
8897 if (const auto *NestedDir =
8898 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8899 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8900 switch (D.getDirectiveKind()) {
8901 case OMPD_target:
8902 // For now, treat 'target' with nested 'teams loop' as if it's
8903 // distributed (target teams distribute).
8904 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8905 return NestedDir;
8906 if (DKind == OMPD_teams) {
8907 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8908 /*IgnoreCaptured=*/true);
8909 if (!Body)
8910 return nullptr;
8911 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8912 if (const auto *NND =
8913 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8914 DKind = NND->getDirectiveKind();
8915 if (isOpenMPDistributeDirective(DKind))
8916 return NND;
8917 }
8918 }
8919 return nullptr;
8920 case OMPD_target_teams:
8921 if (isOpenMPDistributeDirective(DKind))
8922 return NestedDir;
8923 return nullptr;
8924 case OMPD_target_parallel:
8925 case OMPD_target_simd:
8926 case OMPD_target_parallel_for:
8927 case OMPD_target_parallel_for_simd:
8928 return nullptr;
8929 case OMPD_target_teams_distribute:
8930 case OMPD_target_teams_distribute_simd:
8931 case OMPD_target_teams_distribute_parallel_for:
8932 case OMPD_target_teams_distribute_parallel_for_simd:
8933 case OMPD_parallel:
8934 case OMPD_for:
8935 case OMPD_parallel_for:
8936 case OMPD_parallel_master:
8937 case OMPD_parallel_sections:
8938 case OMPD_for_simd:
8939 case OMPD_parallel_for_simd:
8940 case OMPD_cancel:
8941 case OMPD_cancellation_point:
8942 case OMPD_ordered:
8943 case OMPD_threadprivate:
8944 case OMPD_allocate:
8945 case OMPD_task:
8946 case OMPD_simd:
8947 case OMPD_tile:
8948 case OMPD_unroll:
8949 case OMPD_sections:
8950 case OMPD_section:
8951 case OMPD_single:
8952 case OMPD_master:
8953 case OMPD_critical:
8954 case OMPD_taskyield:
8955 case OMPD_barrier:
8956 case OMPD_taskwait:
8957 case OMPD_taskgroup:
8958 case OMPD_atomic:
8959 case OMPD_flush:
8960 case OMPD_depobj:
8961 case OMPD_scan:
8962 case OMPD_teams:
8963 case OMPD_target_data:
8964 case OMPD_target_exit_data:
8965 case OMPD_target_enter_data:
8966 case OMPD_distribute:
8967 case OMPD_distribute_simd:
8968 case OMPD_distribute_parallel_for:
8969 case OMPD_distribute_parallel_for_simd:
8970 case OMPD_teams_distribute:
8971 case OMPD_teams_distribute_simd:
8972 case OMPD_teams_distribute_parallel_for:
8973 case OMPD_teams_distribute_parallel_for_simd:
8974 case OMPD_target_update:
8975 case OMPD_declare_simd:
8976 case OMPD_declare_variant:
8977 case OMPD_begin_declare_variant:
8978 case OMPD_end_declare_variant:
8979 case OMPD_declare_target:
8980 case OMPD_end_declare_target:
8981 case OMPD_declare_reduction:
8982 case OMPD_declare_mapper:
8983 case OMPD_taskloop:
8984 case OMPD_taskloop_simd:
8985 case OMPD_master_taskloop:
8986 case OMPD_master_taskloop_simd:
8987 case OMPD_parallel_master_taskloop:
8988 case OMPD_parallel_master_taskloop_simd:
8989 case OMPD_requires:
8990 case OMPD_metadirective:
8991 case OMPD_unknown:
8992 default:
8993 llvm_unreachable("Unexpected directive.");
8994 }
8995 }
8996
8997 return nullptr;
8998}
8999
9000/// Emit the user-defined mapper function. The code generation follows the
9001/// pattern in the example below.
9002/// \code
9003/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9004/// void *base, void *begin,
9005/// int64_t size, int64_t type,
9006/// void *name = nullptr) {
9007/// // Allocate space for an array section first or add a base/begin for
9008/// // pointer dereference.
9009/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9010/// !maptype.IsDelete)
9011/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9012/// size*sizeof(Ty), clearToFromMember(type));
9013/// // Map members.
9014/// for (unsigned i = 0; i < size; i++) {
9015/// // For each component specified by this mapper:
9016/// for (auto c : begin[i]->all_components) {
9017/// if (c.hasMapper())
9018/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9019/// c.arg_type, c.arg_name);
9020/// else
9021/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9022/// c.arg_begin, c.arg_size, c.arg_type,
9023/// c.arg_name);
9024/// }
9025/// }
9026/// // Delete the array section.
9027/// if (size > 1 && maptype.IsDelete)
9028/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9029/// size*sizeof(Ty), clearToFromMember(type));
9030/// }
9031/// \endcode
9033 CodeGenFunction *CGF) {
9034 if (UDMMap.count(D) > 0)
9035 return;
9037 QualType Ty = D->getType();
9038 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9039 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9040 auto *MapperVarDecl =
9041 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9043 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9044 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9045
9046 // Prepare mapper function arguments and attributes.
9047 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9048 C.VoidPtrTy, ImplicitParamKind::Other);
9049 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9051 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9052 C.VoidPtrTy, ImplicitParamKind::Other);
9053 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9055 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9057 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9059 FunctionArgList Args;
9060 Args.push_back(&HandleArg);
9061 Args.push_back(&BaseArg);
9062 Args.push_back(&BeginArg);
9063 Args.push_back(&SizeArg);
9064 Args.push_back(&TypeArg);
9065 Args.push_back(&NameArg);
9066 const CGFunctionInfo &FnInfo =
9068 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9069 SmallString<64> TyStr;
9070 llvm::raw_svector_ostream Out(TyStr);
9072 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9073 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9074 Name, &CGM.getModule());
9076 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9077 // Start the mapper function code generation.
9078 CodeGenFunction MapperCGF(CGM);
9079 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9080 // Compute the starting and end addresses of array elements.
9081 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9082 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9083 C.getPointerType(Int64Ty), Loc);
9084 // Prepare common arguments for array initiation and deletion.
9085 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9086 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9087 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9088 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9089 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9090 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9091 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9092 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9093 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9094 // Convert the size in bytes into the number of array elements.
9095 Size = MapperCGF.Builder.CreateExactUDiv(
9096 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9097 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9098 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9099 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9100 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9101 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9102 C.getPointerType(Int64Ty), Loc);
9103 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9104 MapperCGF.GetAddrOfLocalVar(&NameArg),
9105 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9106
9107 // Emit array initiation if this is an array section and \p MapType indicates
9108 // that memory allocation is required.
9109 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9110 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9111 MapName, ElementSize, HeadBB, /*IsInit=*/true);
9112
9113 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9114
9115 // Emit the loop header block.
9116 MapperCGF.EmitBlock(HeadBB);
9117 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9118 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9119 // Evaluate whether the initial condition is satisfied.
9120 llvm::Value *IsEmpty =
9121 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9122 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9123 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9124
9125 // Emit the loop body block.
9126 MapperCGF.EmitBlock(BodyBB);
9127 llvm::BasicBlock *LastBB = BodyBB;
9128 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9129 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9130 PtrPHI->addIncoming(PtrBegin, EntryBB);
9131 Address PtrCurrent(PtrPHI, ElemTy,
9132 MapperCGF.GetAddrOfLocalVar(&BeginArg)
9133 .getAlignment()
9134 .alignmentOfArrayElement(ElementSize));
9135 // Privatize the declared variable of mapper to be the current array element.
9137 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9138 (void)Scope.Privatize();
9139
9140 // Get map clause information. Fill up the arrays with all mapped variables.
9141 MappableExprsHandler::MapCombinedInfoTy Info;
9142 MappableExprsHandler MEHandler(*D, MapperCGF);
9143 MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
9144
9145 // Call the runtime API __tgt_mapper_num_components to get the number of
9146 // pre-existing components.
9147 llvm::Value *OffloadingArgs[] = {Handle};
9148 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9149 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9150 OMPRTL___tgt_mapper_num_components),
9151 OffloadingArgs);
9152 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9153 PreviousSize,
9154 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9155
9156 // Fill up the runtime mapper handle for all components.
9157 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9158 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9159 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9160 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9161 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9162 llvm::Value *CurSizeArg = Info.Sizes[I];
9163 llvm::Value *CurNameArg =
9164 (CGM.getCodeGenOpts().getDebugInfo() ==
9165 llvm::codegenoptions::NoDebugInfo)
9166 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9167 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9168
9169 // Extract the MEMBER_OF field from the map type.
9170 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9171 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9172 Info.Types[I]));
9173 llvm::Value *MemberMapType =
9174 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9175
9176 // Combine the map type inherited from user-defined mapper with that
9177 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9178 // bits of the \a MapType, which is the input argument of the mapper
9179 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9180 // bits of MemberMapType.
9181 // [OpenMP 5.0], 1.2.6. map-type decay.
9182 // | alloc | to | from | tofrom | release | delete
9183 // ----------------------------------------------------------
9184 // alloc | alloc | alloc | alloc | alloc | release | delete
9185 // to | alloc | to | alloc | to | release | delete
9186 // from | alloc | alloc | from | from | release | delete
9187 // tofrom | alloc | to | from | tofrom | release | delete
9188 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9189 MapType,
9190 MapperCGF.Builder.getInt64(
9191 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9192 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9193 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9194 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9195 llvm::BasicBlock *AllocElseBB =
9196 MapperCGF.createBasicBlock("omp.type.alloc.else");
9197 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9198 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9199 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9200 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9201 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9202 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9203 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9204 MapperCGF.EmitBlock(AllocBB);
9205 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9206 MemberMapType,
9207 MapperCGF.Builder.getInt64(
9208 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9209 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9210 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9211 MapperCGF.Builder.CreateBr(EndBB);
9212 MapperCGF.EmitBlock(AllocElseBB);
9213 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9214 LeftToFrom,
9215 MapperCGF.Builder.getInt64(
9216 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9217 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9218 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9219 // In case of to, clear OMP_MAP_FROM.
9220 MapperCGF.EmitBlock(ToBB);
9221 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9222 MemberMapType,
9223 MapperCGF.Builder.getInt64(
9224 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9225 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9226 MapperCGF.Builder.CreateBr(EndBB);
9227 MapperCGF.EmitBlock(ToElseBB);
9228 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9229 LeftToFrom,
9230 MapperCGF.Builder.getInt64(
9231 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9232 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9233 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9234 // In case of from, clear OMP_MAP_TO.
9235 MapperCGF.EmitBlock(FromBB);
9236 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9237 MemberMapType,
9238 MapperCGF.Builder.getInt64(
9239 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9240 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9241 // In case of tofrom, do nothing.
9242 MapperCGF.EmitBlock(EndBB);
9243 LastBB = EndBB;
9244 llvm::PHINode *CurMapType =
9245 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9246 CurMapType->addIncoming(AllocMapType, AllocBB);
9247 CurMapType->addIncoming(ToMapType, ToBB);
9248 CurMapType->addIncoming(FromMapType, FromBB);
9249 CurMapType->addIncoming(MemberMapType, ToElseBB);
9250
9251 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9252 CurSizeArg, CurMapType, CurNameArg};
9253 if (Info.Mappers[I]) {
9254 // Call the corresponding mapper function.
9255 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9256 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9257 assert(MapperFunc && "Expect a valid mapper function is available.");
9258 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9259 } else {
9260 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9261 // data structure.
9262 MapperCGF.EmitRuntimeCall(
9263 OMPBuilder.getOrCreateRuntimeFunction(
9264 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9265 OffloadingArgs);
9266 }
9267 }
9268
9269 // Update the pointer to point to the next element that needs to be mapped,
9270 // and check whether we have mapped all elements.
9271 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9272 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9273 PtrPHI->addIncoming(PtrNext, LastBB);
9274 llvm::Value *IsDone =
9275 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9276 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9277 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9278
9279 MapperCGF.EmitBlock(ExitBB);
9280 // Emit array deletion if this is an array section and \p MapType indicates
9281 // that deletion is required.
9282 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9283 MapName, ElementSize, DoneBB, /*IsInit=*/false);
9284
9285 // Emit the function exit block.
9286 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9287 MapperCGF.FinishFunction();
9288 UDMMap.try_emplace(D, Fn);
9289 if (CGF) {
9290 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9291 Decls.second.push_back(D);
9292 }
9293}
9294
9295/// Emit the array initialization or deletion portion for user-defined mapper
9296/// code generation. First, it evaluates whether an array section is mapped and
9297/// whether the \a MapType instructs to delete this section. If \a IsInit is
9298/// true, and \a MapType indicates to not delete this array, array
9299/// initialization code is generated. If \a IsInit is false, and \a MapType
9300/// indicates to not this array, array deletion code is generated.
9302 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9303 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9304 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9305 bool IsInit) {
9306 StringRef Prefix = IsInit ? ".init" : ".del";
9307
9308 // Evaluate if this is an array section.
9309 llvm::BasicBlock *BodyBB =
9310 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9311 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9312 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9313 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9314 MapType,
9315 MapperCGF.Builder.getInt64(
9316 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9317 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9318 llvm::Value *DeleteCond;
9319 llvm::Value *Cond;
9320 if (IsInit) {
9321 // base != begin?
9322 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9323 // IsPtrAndObj?
9324 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9325 MapType,
9326 MapperCGF.Builder.getInt64(
9327 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9328 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9329 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9330 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9331 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9332 DeleteCond = MapperCGF.Builder.CreateIsNull(
9333 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9334 } else {
9335 Cond = IsArray;
9336 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9337 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9338 }
9339 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9340 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9341
9342 MapperCGF.EmitBlock(BodyBB);
9343 // Get the array size by multiplying element size and element number (i.e., \p
9344 // Size).
9345 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9346 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9347 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9348 // memory allocation/deletion purpose only.
9349 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9350 MapType,
9351 MapperCGF.Builder.getInt64(
9352 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9353 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9354 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9355 MapTypeArg = MapperCGF.Builder.CreateOr(
9356 MapTypeArg,
9357 MapperCGF.Builder.getInt64(
9358 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9359 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9360
9361 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9362 // data structure.
9363 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9364 ArraySize, MapTypeArg, MapName};
9365 MapperCGF.EmitRuntimeCall(
9366 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9367 OMPRTL___tgt_push_mapper_component),
9368 OffloadingArgs);
9369}
9370
9372 const OMPDeclareMapperDecl *D) {
9373 auto I = UDMMap.find(D);
9374 if (I != UDMMap.end())
9375 return I->second;
9377 return UDMMap.lookup(D);
9378}
9379
9382 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9383 const OMPLoopDirective &D)>
9384 SizeEmitter) {
9386 const OMPExecutableDirective *TD = &D;
9387 // Get nested teams distribute kind directive, if any. For now, treat
9388 // 'target_teams_loop' as if it's really a target_teams_distribute.
9389 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9390 Kind != OMPD_target_teams_loop)
9392 if (!TD)
9393 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9394
9395 const auto *LD = cast<OMPLoopDirective>(TD);
9396 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9397 return NumIterations;
9398 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9399}
9400
9401static void
9402emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9403 const OMPExecutableDirective &D,
9405 bool RequiresOuterTask, const CapturedStmt &CS,
9406 bool OffloadingMandatory, CodeGenFunction &CGF) {
9407 if (OffloadingMandatory) {
9408 CGF.Builder.CreateUnreachable();
9409 } else {
9410 if (RequiresOuterTask) {
9411 CapturedVars.clear();
9412 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9413 }
9414 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9415 CapturedVars);
9416 }
9417}
9418
9419static llvm::Value *emitDeviceID(
9420 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9421 CodeGenFunction &CGF) {
9422 // Emit device ID if any.
9423 llvm::Value *DeviceID;
9424 if (Device.getPointer()) {
9425 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9426 Device.getInt() == OMPC_DEVICE_device_num) &&
9427 "Expected device_num modifier.");
9428 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9429 DeviceID =
9430 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9431 } else {
9432 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9433 }
9434 return DeviceID;
9435}
9436
9438 CodeGenFunction &CGF) {
9439 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9440
9441 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9442 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9443 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9444 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9445 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9446 /*isSigned=*/false);
9447 }
9448 return DynCGroupMem;
9449}
9450
9452 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9453 const OMPExecutableDirective &D,
9454 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9455 const CapturedStmt &CS, bool OffloadingMandatory,
9456 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9457 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9458 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9459 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9460 const OMPLoopDirective &D)>
9461 SizeEmitter,
9462 CodeGenFunction &CGF, CodeGenModule &CGM) {
9463 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9464
9465 // Fill up the arrays with all the captured variables.
9466 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9467
9468 // Get mappable expression information.
9469 MappableExprsHandler MEHandler(D, CGF);
9470 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9472
9473 auto RI = CS.getCapturedRecordDecl()->field_begin();
9474 auto *CV = CapturedVars.begin();
9476 CE = CS.capture_end();
9477 CI != CE; ++CI, ++RI, ++CV) {
9478 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9479 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9480
9481 // VLA sizes are passed to the outlined region by copy and do not have map
9482 // information associated.
9483 if (CI->capturesVariableArrayType()) {
9484 CurInfo.Exprs.push_back(nullptr);
9485 CurInfo.BasePointers.push_back(*CV);
9486 CurInfo.DevicePtrDecls.push_back(nullptr);
9487 CurInfo.DevicePointers.push_back(
9488 MappableExprsHandler::DeviceInfoTy::None);
9489 CurInfo.Pointers.push_back(*CV);
9490 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9491 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9492 // Copy to the device as an argument. No need to retrieve it.
9493 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9494 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9495 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9496 CurInfo.Mappers.push_back(nullptr);
9497 } else {
9498 // If we have any information in the map clause, we use it, otherwise we
9499 // just do a default mapping.
9500 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9501 if (!CI->capturesThis())
9502 MappedVarSet.insert(CI->getCapturedVar());
9503 else
9504 MappedVarSet.insert(nullptr);
9505 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9506 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9507 // Generate correct mapping for variables captured by reference in
9508 // lambdas.
9509 if (CI->capturesVariable())
9510 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9511 CurInfo, LambdaPointers);
9512 }
9513 // We expect to have at least an element of information for this capture.
9514 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9515 "Non-existing map pointer for capture!");
9516 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9517 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9518 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9519 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9520 "Inconsistent map information sizes!");
9521
9522 // If there is an entry in PartialStruct it means we have a struct with
9523 // individual members mapped. Emit an extra combined entry.
9524 if (PartialStruct.Base.isValid()) {
9525 CombinedInfo.append(PartialStruct.PreliminaryMapData);
9526 MEHandler.emitCombinedEntry(
9527 CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9528 OMPBuilder, nullptr,
9529 !PartialStruct.PreliminaryMapData.BasePointers.empty());
9530 }
9531
9532 // We need to append the results of this capture to what we already have.
9533 CombinedInfo.append(CurInfo);
9534 }
9535 // Adjust MEMBER_OF flags for the lambdas captures.
9536 MEHandler.adjustMemberOfForLambdaCaptures(
9537 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9538 CombinedInfo.Pointers, CombinedInfo.Types);
9539 // Map any list items in a map clause that were not captures because they
9540 // weren't referenced within the construct.
9541 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
9542
9544 // Fill up the arrays and create the arguments.
9545 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9546 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9547 llvm::codegenoptions::NoDebugInfo;
9548 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9549 EmitDebug,
9550 /*ForEndCall=*/false);
9551
9552 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9553 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9554 CGF.VoidPtrTy, CGM.getPointerAlign());
9555 InputInfo.PointersArray =
9556 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9557 InputInfo.SizesArray =
9558 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9559 InputInfo.MappersArray =
9560 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9561 MapTypesArray = Info.RTArgs.MapTypesArray;
9562 MapNamesArray = Info.RTArgs.MapNamesArray;
9563
9564 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9565 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9566 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9567 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9568 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9569
9570 if (IsReverseOffloading) {
9571 // Reverse offloading is not supported, so just execute on the host.
9572 // FIXME: This fallback solution is incorrect since it ignores the
9573 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9574 // assert here and ensure SEMA emits an error.
9575 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9576 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9577 return;
9578 }
9579
9580 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9581 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9582
9583 llvm::Value *BasePointersArray =
9584 InputInfo.BasePointersArray.emitRawPointer(CGF);
9585 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9586 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9587 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9588
9589 auto &&EmitTargetCallFallbackCB =
9590 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9591 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9592 -> llvm::OpenMPIRBuilder::InsertPointTy {
9593 CGF.Builder.restoreIP(IP);
9594 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9595 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9596 return CGF.Builder.saveIP();
9597 };
9598
9599 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9600 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9601 llvm::Value *NumThreads =
9602 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9603 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9604 llvm::Value *NumIterations =
9605 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9606 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9607 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9608 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9609
9610 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9611 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9612 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9613
9614 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9615 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9616 DynCGGroupMem, HasNoWait);
9617
9618 CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9619 CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9620 DeviceID, RTLoc, AllocaIP));
9621 };
9622
9623 if (RequiresOuterTask)
9624 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9625 else
9626 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9627}
9628
9629static void
9630emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9631 const OMPExecutableDirective &D,
9633 bool RequiresOuterTask, const CapturedStmt &CS,
9634 bool OffloadingMandatory, CodeGenFunction &CGF) {
9635
9636 // Notify that the host version must be executed.
9637 auto &&ElseGen =
9638 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9639 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9640 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9641 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9642 };
9643
9644 if (RequiresOuterTask) {
9645 CodeGenFunction::OMPTargetDataInfo InputInfo;
9646 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9647 } else {
9648 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9649 }
9650}
9651
9654 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9655 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9656 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9657 const OMPLoopDirective &D)>
9658 SizeEmitter) {
9659 if (!CGF.HaveInsertPoint())
9660 return;
9661
9662 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9663 CGM.getLangOpts().OpenMPOffloadMandatory;
9664
9665 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9666
9667 const bool RequiresOuterTask =
9671 (CGM.getLangOpts().OpenMP >= 51 &&
9675 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9676 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9677 PrePostActionTy &) {
9678 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9679 };
9680 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9681
9683 llvm::Value *MapTypesArray = nullptr;
9684 llvm::Value *MapNamesArray = nullptr;
9685
9686 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9687 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9688 OutlinedFnID, &InputInfo, &MapTypesArray,
9689 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9690 PrePostActionTy &) {
9691 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9692 RequiresOuterTask, CS, OffloadingMandatory,
9693 Device, OutlinedFnID, InputInfo, MapTypesArray,
9694 MapNamesArray, SizeEmitter, CGF, CGM);
9695 };
9696
9697 auto &&TargetElseGen =
9698 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9699 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9700 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9701 CS, OffloadingMandatory, CGF);
9702 };
9703
9704 // If we have a target function ID it means that we need to support
9705 // offloading, otherwise, just execute on the host. We need to execute on host
9706 // regardless of the conditional in the if clause if, e.g., the user do not
9707 // specify target triples.
9708 if (OutlinedFnID) {
9709 if (IfCond) {
9710 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9711 } else {
9712 RegionCodeGenTy ThenRCG(TargetThenGen);
9713 ThenRCG(CGF);
9714 }
9715 } else {
9716 RegionCodeGenTy ElseRCG(TargetElseGen);
9717 ElseRCG(CGF);
9718 }
9719}
9720
9722 StringRef ParentName) {
9723 if (!S)
9724 return;
9725
9726 // Codegen OMP target directives that offload compute to the device.
9727 bool RequiresDeviceCodegen =
9728 isa<OMPExecutableDirective>(S) &&
9730 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9731
9732 if (RequiresDeviceCodegen) {
9733 const auto &E = *cast<OMPExecutableDirective>(S);
9734
9735 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9736 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9737
9738 // Is this a target region that should not be emitted as an entry point? If
9739 // so just signal we are done with this target region.
9740 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9741 return;
9742
9743 switch (E.getDirectiveKind()) {
9744 case OMPD_target:
9746 cast<OMPTargetDirective>(E));
9747 break;
9748 case OMPD_target_parallel:
9750 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9751 break;
9752 case OMPD_target_teams:
9754 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9755 break;
9756 case OMPD_target_teams_distribute:
9758 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9759 break;
9760 case OMPD_target_teams_distribute_simd:
9762 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9763 break;
9764 case OMPD_target_parallel_for:
9766 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9767 break;
9768 case OMPD_target_parallel_for_simd:
9770 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9771 break;
9772 case OMPD_target_simd:
9774 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9775 break;
9776 case OMPD_target_teams_distribute_parallel_for:
9778 CGM, ParentName,
9779 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9780 break;
9781 case OMPD_target_teams_distribute_parallel_for_simd:
9784 CGM, ParentName,
9785 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9786 break;
9787 case OMPD_target_teams_loop:
9789 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9790 break;
9791 case OMPD_target_parallel_loop:
9793 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9794 break;
9795 case OMPD_parallel:
9796 case OMPD_for:
9797 case OMPD_parallel_for:
9798 case OMPD_parallel_master:
9799 case OMPD_parallel_sections:
9800 case OMPD_for_simd:
9801 case OMPD_parallel_for_simd:
9802 case OMPD_cancel:
9803 case OMPD_cancellation_point:
9804 case OMPD_ordered:
9805 case OMPD_threadprivate:
9806 case OMPD_allocate:
9807 case OMPD_task:
9808 case OMPD_simd:
9809 case OMPD_tile:
9810 case OMPD_unroll:
9811 case OMPD_sections:
9812 case OMPD_section:
9813 case OMPD_single:
9814 case OMPD_master:
9815 case OMPD_critical:
9816 case OMPD_taskyield:
9817 case OMPD_barrier:
9818 case OMPD_taskwait:
9819 case OMPD_taskgroup:
9820 case OMPD_atomic:
9821 case OMPD_flush:
9822 case OMPD_depobj:
9823 case OMPD_scan:
9824 case OMPD_teams:
9825 case OMPD_target_data:
9826 case OMPD_target_exit_data:
9827 case OMPD_target_enter_data:
9828 case OMPD_distribute:
9829 case OMPD_distribute_simd:
9830 case OMPD_distribute_parallel_for:
9831 case OMPD_distribute_parallel_for_simd:
9832 case OMPD_teams_distribute:
9833 case OMPD_teams_distribute_simd:
9834 case OMPD_teams_distribute_parallel_for:
9835 case OMPD_teams_distribute_parallel_for_simd:
9836 case OMPD_target_update:
9837 case OMPD_declare_simd:
9838 case OMPD_declare_variant:
9839 case OMPD_begin_declare_variant:
9840 case OMPD_end_declare_variant:
9841 case OMPD_declare_target:
9842 case OMPD_end_declare_target:
9843 case OMPD_declare_reduction:
9844 case OMPD_declare_mapper:
9845 case OMPD_taskloop:
9846 case OMPD_taskloop_simd:
9847 case OMPD_master_taskloop:
9848 case OMPD_master_taskloop_simd:
9849 case OMPD_parallel_master_taskloop:
9850 case OMPD_parallel_master_taskloop_simd:
9851 case OMPD_requires:
9852 case OMPD_metadirective:
9853 case OMPD_unknown:
9854 default:
9855 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9856 }
9857 return;
9858 }
9859
9860 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9861 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9862 return;
9863
9864 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9865 return;
9866 }
9867
9868 // If this is a lambda function, look into its body.
9869 if (const auto *L = dyn_cast<LambdaExpr>(S))
9870 S = L->getBody();
9871
9872 // Keep looking for target regions recursively.
9873 for (const Stmt *II : S->children())
9874 scanForTargetRegionsFunctions(II, ParentName);
9875}
9876
9877static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9878 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9879 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9880 if (!DevTy)
9881 return false;
9882 // Do not emit device_type(nohost) functions for the host.
9883 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9884 return true;
9885 // Do not emit device_type(host) functions for the device.
9886 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9887 return true;
9888 return false;
9889}
9890
9892 // If emitting code for the host, we do not process FD here. Instead we do
9893 // the normal code generation.
9894 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9895 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9896 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9897 CGM.getLangOpts().OpenMPIsTargetDevice))
9898 return true;
9899 return false;
9900 }
9901
9902 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9903 // Try to detect target regions in the function.
9904 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9905 StringRef Name = CGM.getMangledName(GD);
9906 scanForTargetRegionsFunctions(FD->getBody(), Name);
9907 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9908 CGM.getLangOpts().OpenMPIsTargetDevice))
9909 return true;
9910 }
9911
9912 // Do not to emit function if it is not marked as declare target.
9913 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9914 AlreadyEmittedTargetDecls.count(VD) == 0;
9915}
9916
9918 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9919 CGM.getLangOpts().OpenMPIsTargetDevice))
9920 return true;
9921
9922 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9923 return false;
9924
9925 // Check if there are Ctors/Dtors in this declaration and look for target
9926 // regions in it. We use the complete variant to produce the kernel name
9927 // mangling.
9928 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9929 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9930 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9931 StringRef ParentName =
9933 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9934 }
9935 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9936 StringRef ParentName =
9938 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9939 }
9940 }
9941
9942 // Do not to emit variable if it is not marked as declare target.
9943 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9944 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9945 cast<VarDecl>(GD.getDecl()));
9946 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9947 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9948 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9950 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9951 return true;
9952 }
9953 return false;
9954}
9955
9957 llvm::Constant *Addr) {
9958 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9959 !CGM.getLangOpts().OpenMPIsTargetDevice)
9960 return;
9961
9962 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9963 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9964
9965 // If this is an 'extern' declaration we defer to the canonical definition and
9966 // do not emit an offloading entry.
9967 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
9968 VD->hasExternalStorage())
9969 return;
9970
9971 if (!Res) {
9972 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9973 // Register non-target variables being emitted in device code (debug info
9974 // may cause this).
9975 StringRef VarName = CGM.getMangledName(VD);
9976 EmittedNonTargetVariables.try_emplace(VarName, Addr);
9977 }
9978 return;
9979 }
9980
9981 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
9982 auto LinkageForVariable = [&VD, this]() {
9984 };
9985
9986 std::vector<llvm::GlobalVariable *> GeneratedRefs;
9987 OMPBuilder.registerTargetGlobalVariable(
9990 VD->isExternallyVisible(),
9992 VD->getCanonicalDecl()->getBeginLoc()),
9993 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
9994 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
9997 Addr);
9998
9999 for (auto *ref : GeneratedRefs)
10001}
10002
10004 if (isa<FunctionDecl>(GD.getDecl()) ||
10005 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10006 return emitTargetFunctions(GD);
10007
10008 return emitTargetGlobalVariable(GD);
10009}
10010
10012 for (const VarDecl *VD : DeferredGlobalVariables) {
10013 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10014 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10015 if (!Res)
10016 continue;
10017 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10018 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10020 CGM.EmitGlobal(VD);
10021 } else {
10022 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10023 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10024 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10026 "Expected link clause or to clause with unified memory.");
10028 }
10029 }
10030}
10031
10033 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10035 " Expected target-based directive.");
10036}
10037
10039 for (const OMPClause *Clause : D->clauselists()) {
10040 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10042 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10043 } else if (const auto *AC =
10044 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10045 switch (AC->getAtomicDefaultMemOrderKind()) {
10046 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10047 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10048 break;
10049 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10050 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10051 break;
10052 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10053 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10054 break;
10056 break;
10057 }
10058 }
10059 }
10060}
10061
10062llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10064}
10065
10067 LangAS &AS) {
10068 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10069 return false;
10070 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10071 switch(A->getAllocatorType()) {
10072 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10073 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10074 // Not supported, fallback to the default mem space.
10075 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10076 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10077 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10078 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10079 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10080 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10081 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10082 AS = LangAS::Default;
10083 return true;
10084 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10085 llvm_unreachable("Expected predefined allocator for the variables with the "
10086 "static storage.");
10087 }
10088 return false;
10089}
10090
10093}
10094
10096 CodeGenModule &CGM)
10097 : CGM(CGM) {
10098 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10099 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10101 }
10102}
10103
10105 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10106 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10107}
10108
10110 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10111 return true;
10112
10113 const auto *D = cast<FunctionDecl>(GD.getDecl());
10114 // Do not to emit function if it is marked as declare target as it was already
10115 // emitted.
10116 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10117 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10118 if (auto *F = dyn_cast_or_null<llvm::Function>(
10120 return !F->isDeclaration();
10121 return false;
10122 }
10123 return true;
10124 }
10125
10126 return !AlreadyEmittedTargetDecls.insert(D).second;
10127}
10128
10130 const OMPExecutableDirective &D,
10132 llvm::Function *OutlinedFn,
10133 ArrayRef<llvm::Value *> CapturedVars) {
10134 if (!CGF.HaveInsertPoint())
10135 return;
10136
10137 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10139
10140 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10141 llvm::Value *Args[] = {
10142 RTLoc,
10143 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10144 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10146 RealArgs.append(std::begin(Args), std::end(Args));
10147 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10148
10149 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10150 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10151 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10152}
10153
10155 const Expr *NumTeams,
10156 const Expr *ThreadLimit,
10158 if (!CGF.HaveInsertPoint())
10159 return;
10160
10161 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10162
10163 llvm::Value *NumTeamsVal =
10164 NumTeams
10165 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10166 CGF.CGM.Int32Ty, /* isSigned = */ true)
10167 : CGF.Builder.getInt32(0);
10168
10169 llvm::Value *ThreadLimitVal =
10170 ThreadLimit
10171 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10172 CGF.CGM.Int32Ty, /* isSigned = */ true)
10173 : CGF.Builder.getInt32(0);
10174
10175 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10176 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10177 ThreadLimitVal};
10178 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10179 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10180 PushNumTeamsArgs);
10181}
10182
10184 const Expr *ThreadLimit,
10186 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10187 llvm::Value *ThreadLimitVal =
10188 ThreadLimit
10189 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10190 CGF.CGM.Int32Ty, /* isSigned = */ true)
10191 : CGF.Builder.getInt32(0);
10192
10193 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10194 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10195 ThreadLimitVal};
10196 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10197 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10198 ThreadLimitArgs);
10199}
10200
10202 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10203 const Expr *Device, const RegionCodeGenTy &CodeGen,
10205 if (!CGF.HaveInsertPoint())
10206 return;
10207
10208 // Action used to replace the default codegen action and turn privatization
10209 // off.
10210 PrePostActionTy NoPrivAction;
10211
10212 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10213
10214 llvm::Value *IfCondVal = nullptr;
10215 if (IfCond)
10216 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10217
10218 // Emit device ID if any.
10219 llvm::Value *DeviceID = nullptr;
10220 if (Device) {
10221 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10222 CGF.Int64Ty, /*isSigned=*/true);
10223 } else {
10224 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10225 }
10226
10227 // Fill up the arrays with all the mapped variables.
10228 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10229 auto GenMapInfoCB =
10230 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10231 CGF.Builder.restoreIP(CodeGenIP);
10232 // Get map clause information.
10233 MappableExprsHandler MEHandler(D, CGF);
10234 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10235
10236 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10237 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10238 };
10239 if (CGM.getCodeGenOpts().getDebugInfo() !=
10240 llvm::codegenoptions::NoDebugInfo) {
10241 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10242 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10243 FillInfoMap);
10244 }
10245
10246 return CombinedInfo;
10247 };
10248 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10249 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10250 CGF.Builder.restoreIP(CodeGenIP);
10251 switch (BodyGenType) {
10252 case BodyGenTy::Priv:
10253 if (!Info.CaptureDeviceAddrMap.empty())
10254 CodeGen(CGF);
10255 break;
10256 case BodyGenTy::DupNoPriv:
10257 if (!Info.CaptureDeviceAddrMap.empty()) {
10258 CodeGen.setAction(NoPrivAction);
10259 CodeGen(CGF);
10260 }
10261 break;
10262 case BodyGenTy::NoPriv:
10263 if (Info.CaptureDeviceAddrMap.empty()) {
10264 CodeGen.setAction(NoPrivAction);
10265 CodeGen(CGF);
10266 }
10267 break;
10268 }
10269 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10270 CGF.Builder.GetInsertPoint());
10271 };
10272
10273 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10274 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10275 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10276 }
10277 };
10278
10279 auto CustomMapperCB = [&](unsigned int I) {
10280 llvm::Value *MFunc = nullptr;
10281 if (CombinedInfo.Mappers[I]) {
10282 Info.HasMapper = true;
10284 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10285 }
10286 return MFunc;
10287 };
10288
10289 // Source location for the ident struct
10290 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10291
10292 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10293 CGF.AllocaInsertPt->getIterator());
10294 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10295 CGF.Builder.GetInsertPoint());
10296 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10297 CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10298 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10299 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10300}
10301
10303 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10304 const Expr *Device) {
10305 if (!CGF.HaveInsertPoint())
10306 return;
10307
10308 assert((isa<OMPTargetEnterDataDirective>(D) ||
10309 isa<OMPTargetExitDataDirective>(D) ||
10310 isa<OMPTargetUpdateDirective>(D)) &&
10311 "Expecting either target enter, exit data, or update directives.");
10312
10314 llvm::Value *MapTypesArray = nullptr;
10315 llvm::Value *MapNamesArray = nullptr;
10316 // Generate the code for the opening of the data environment.
10317 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10318 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10319 // Emit device ID if any.
10320 llvm::Value *DeviceID = nullptr;
10321 if (Device) {
10322 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10323 CGF.Int64Ty, /*isSigned=*/true);
10324 } else {
10325 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10326 }
10327
10328 // Emit the number of elements in the offloading arrays.
10329 llvm::Constant *PointerNum =
10330 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10331
10332 // Source location for the ident struct
10333 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10334
10335 llvm::Value *OffloadingArgs[] = {
10336 RTLoc,
10337 DeviceID,
10338 PointerNum,
10339 InputInfo.BasePointersArray.emitRawPointer(CGF),
10340 InputInfo.PointersArray.emitRawPointer(CGF),
10341 InputInfo.SizesArray.emitRawPointer(CGF),
10342 MapTypesArray,
10343 MapNamesArray,
10344 InputInfo.MappersArray.emitRawPointer(CGF)};
10345
10346 // Select the right runtime function call for each standalone
10347 // directive.
10348 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10349 RuntimeFunction RTLFn;
10350 switch (D.getDirectiveKind()) {
10351 case OMPD_target_enter_data:
10352 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10353 : OMPRTL___tgt_target_data_begin_mapper;
10354 break;
10355 case OMPD_target_exit_data:
10356 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10357 : OMPRTL___tgt_target_data_end_mapper;
10358 break;
10359 case OMPD_target_update:
10360 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10361 : OMPRTL___tgt_target_data_update_mapper;
10362 break;
10363 case OMPD_parallel:
10364 case OMPD_for:
10365 case OMPD_parallel_for:
10366 case OMPD_parallel_master:
10367 case OMPD_parallel_sections:
10368 case OMPD_for_simd:
10369 case OMPD_parallel_for_simd:
10370 case OMPD_cancel:
10371 case OMPD_cancellation_point:
10372 case OMPD_ordered:
10373 case OMPD_threadprivate:
10374 case OMPD_allocate:
10375 case OMPD_task:
10376 case OMPD_simd:
10377 case OMPD_tile:
10378 case OMPD_unroll:
10379 case OMPD_sections:
10380 case OMPD_section:
10381 case OMPD_single:
10382 case OMPD_master:
10383 case OMPD_critical:
10384 case OMPD_taskyield:
10385 case OMPD_barrier:
10386 case OMPD_taskwait:
10387 case OMPD_taskgroup:
10388 case OMPD_atomic:
10389 case OMPD_flush:
10390 case OMPD_depobj:
10391 case OMPD_scan:
10392 case OMPD_teams:
10393 case OMPD_target_data:
10394 case OMPD_distribute:
10395 case OMPD_distribute_simd:
10396 case OMPD_distribute_parallel_for:
10397 case OMPD_distribute_parallel_for_simd:
10398 case OMPD_teams_distribute:
10399 case OMPD_teams_distribute_simd:
10400 case OMPD_teams_distribute_parallel_for:
10401 case OMPD_teams_distribute_parallel_for_simd:
10402 case OMPD_declare_simd:
10403 case OMPD_declare_variant:
10404 case OMPD_begin_declare_variant:
10405 case OMPD_end_declare_variant:
10406 case OMPD_declare_target:
10407 case OMPD_end_declare_target:
10408 case OMPD_declare_reduction:
10409 case OMPD_declare_mapper:
10410 case OMPD_taskloop:
10411 case OMPD_taskloop_simd:
10412 case OMPD_master_taskloop:
10413 case OMPD_master_taskloop_simd:
10414 case OMPD_parallel_master_taskloop:
10415 case OMPD_parallel_master_taskloop_simd:
10416 case OMPD_target:
10417 case OMPD_target_simd:
10418 case OMPD_target_teams_distribute:
10419 case OMPD_target_teams_distribute_simd:
10420 case OMPD_target_teams_distribute_parallel_for:
10421 case OMPD_target_teams_distribute_parallel_for_simd:
10422 case OMPD_target_teams:
10423 case OMPD_target_parallel:
10424 case OMPD_target_parallel_for:
10425 case OMPD_target_parallel_for_simd:
10426 case OMPD_requires:
10427 case OMPD_metadirective:
10428 case OMPD_unknown:
10429 default:
10430 llvm_unreachable("Unexpected standalone target data directive.");
10431 break;
10432 }
10433 CGF.EmitRuntimeCall(
10434 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10435 OffloadingArgs);
10436 };
10437
10438 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10439 &MapNamesArray](CodeGenFunction &CGF,
10440 PrePostActionTy &) {
10441 // Fill up the arrays with all the mapped variables.
10442 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10443
10444 // Get map clause information.
10445 MappableExprsHandler MEHandler(D, CGF);
10446 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10447
10449 // Fill up the arrays and create the arguments.
10450 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10451 /*IsNonContiguous=*/true);
10452 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10454 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10455 llvm::codegenoptions::NoDebugInfo;
10456 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10457 EmitDebug,
10458 /*ForEndCall=*/false);
10459 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10460 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10462 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10464 InputInfo.SizesArray =
10465 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10466 InputInfo.MappersArray =
10467 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10468 MapTypesArray = Info.RTArgs.MapTypesArray;
10469 MapNamesArray = Info.RTArgs.MapNamesArray;
10470 if (RequiresOuterTask)
10471 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10472 else
10473 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10474 };
10475
10476 if (IfCond) {
10477 emitIfClause(CGF, IfCond, TargetThenGen,
10478 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10479 } else {
10480 RegionCodeGenTy ThenRCG(TargetThenGen);
10481 ThenRCG(CGF);
10482 }
10483}
10484
10485namespace {
10486 /// Kind of parameter in a function with 'declare simd' directive.
10487enum ParamKindTy {
10488 Linear,
10489 LinearRef,
10490 LinearUVal,
10491 LinearVal,
10492 Uniform,
10493 Vector,
10494};
10495/// Attribute set of the parameter.
10496struct ParamAttrTy {
10497 ParamKindTy Kind = Vector;
10498 llvm::APSInt StrideOrArg;
10499 llvm::APSInt Alignment;
10500 bool HasVarStride = false;
10501};
10502} // namespace
10503
10504static unsigned evaluateCDTSize(const FunctionDecl *FD,
10505 ArrayRef<ParamAttrTy> ParamAttrs) {
10506 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10507 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10508 // of that clause. The VLEN value must be power of 2.
10509 // In other case the notion of the function`s "characteristic data type" (CDT)
10510 // is used to compute the vector length.
10511 // CDT is defined in the following order:
10512 // a) For non-void function, the CDT is the return type.
10513 // b) If the function has any non-uniform, non-linear parameters, then the
10514 // CDT is the type of the first such parameter.
10515 // c) If the CDT determined by a) or b) above is struct, union, or class
10516 // type which is pass-by-value (except for the type that maps to the
10517 // built-in complex data type), the characteristic data type is int.
10518 // d) If none of the above three cases is applicable, the CDT is int.
10519 // The VLEN is then determined based on the CDT and the size of vector
10520 // register of that ISA for which current vector version is generated. The
10521 // VLEN is computed using the formula below:
10522 // VLEN = sizeof(vector_register) / sizeof(CDT),
10523 // where vector register size specified in section 3.2.1 Registers and the
10524 // Stack Frame of original AMD64 ABI document.
10525 QualType RetType = FD->getReturnType();
10526 if (RetType.isNull())
10527 return 0;
10528 ASTContext &C = FD->getASTContext();
10529 QualType CDT;
10530 if (!RetType.isNull() && !RetType->isVoidType()) {
10531 CDT = RetType;
10532 } else {
10533 unsigned Offset = 0;
10534 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10535 if (ParamAttrs[Offset].Kind == Vector)
10536 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10537 ++Offset;
10538 }
10539 if (CDT.isNull()) {
10540 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10541 if (ParamAttrs[I + Offset].Kind == Vector) {
10542 CDT = FD->getParamDecl(I)->getType();
10543 break;
10544 }
10545 }
10546 }
10547 }
10548 if (CDT.isNull())
10549 CDT = C.IntTy;
10550 CDT = CDT->getCanonicalTypeUnqualified();
10551 if (CDT->isRecordType() || CDT->isUnionType())
10552 CDT = C.IntTy;
10553 return C.getTypeSize(CDT);
10554}
10555
10556/// Mangle the parameter part of the vector function name according to
10557/// their OpenMP classification. The mangling function is defined in
10558/// section 4.5 of the AAVFABI(2021Q1).
10559static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10560 SmallString<256> Buffer;
10561 llvm::raw_svector_ostream Out(Buffer);
10562 for (const auto &ParamAttr : ParamAttrs) {
10563 switch (ParamAttr.Kind) {
10564 case Linear:
10565 Out << 'l';
10566 break;
10567 case LinearRef:
10568 Out << 'R';
10569 break;
10570 case LinearUVal:
10571 Out << 'U';
10572 break;
10573 case LinearVal:
10574 Out << 'L';
10575 break;
10576 case Uniform:
10577 Out << 'u';
10578 break;
10579 case Vector:
10580 Out << 'v';
10581 break;
10582 }
10583 if (ParamAttr.HasVarStride)
10584 Out << "s" << ParamAttr.StrideOrArg;
10585 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10586 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10587 // Don't print the step value if it is not present or if it is
10588 // equal to 1.
10589 if (ParamAttr.StrideOrArg < 0)
10590 Out << 'n' << -ParamAttr.StrideOrArg;
10591 else if (ParamAttr.StrideOrArg != 1)
10592 Out << ParamAttr.StrideOrArg;
10593 }
10594
10595 if (!!ParamAttr.Alignment)
10596 Out << 'a' << ParamAttr.Alignment;
10597 }
10598
10599 return std::string(Out.str());
10600}
10601
10602static void
10603emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10604 const llvm::APSInt &VLENVal,
10605 ArrayRef<ParamAttrTy> ParamAttrs,
10606 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10607 struct ISADataTy {
10608 char ISA;
10609 unsigned VecRegSize;
10610 };
10611 ISADataTy ISAData[] = {
10612 {
10613 'b', 128
10614 }, // SSE
10615 {
10616 'c', 256
10617 }, // AVX
10618 {
10619 'd', 256
10620 }, // AVX2
10621 {
10622 'e', 512
10623 }, // AVX512
10624 };
10626 switch (State) {
10627 case OMPDeclareSimdDeclAttr::BS_Undefined:
10628 Masked.push_back('N');
10629 Masked.push_back('M');
10630 break;
10631 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10632 Masked.push_back('N');
10633 break;
10634 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10635 Masked.push_back('M');
10636 break;
10637 }
10638 for (char Mask : Masked) {
10639 for (const ISADataTy &Data : ISAData) {
10640 SmallString<256> Buffer;
10641 llvm::raw_svector_ostream Out(Buffer);
10642 Out << "_ZGV" << Data.ISA << Mask;
10643 if (!VLENVal) {
10644 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10645 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10646 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10647 } else {
10648 Out << VLENVal;
10649 }
10650 Out << mangleVectorParameters(ParamAttrs);
10651 Out << '_' << Fn->getName();
10652 Fn->addFnAttr(Out.str());
10653 }
10654 }
10655}
10656
10657// This are the Functions that are needed to mangle the name of the
10658// vector functions generated by the compiler, according to the rules
10659// defined in the "Vector Function ABI specifications for AArch64",
10660// available at
10661// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10662
10663/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10664static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10665 QT = QT.getCanonicalType();
10666
10667 if (QT->isVoidType())
10668 return false;
10669
10670 if (Kind == ParamKindTy::Uniform)
10671 return false;
10672
10673 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10674 return false;
10675
10676 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10677 !QT->isReferenceType())
10678 return false;
10679
10680 return true;
10681}
10682
10683/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10685 QT = QT.getCanonicalType();
10686 unsigned Size = C.getTypeSize(QT);
10687
10688 // Only scalars and complex within 16 bytes wide set PVB to true.
10689 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10690 return false;
10691
10692 if (QT->isFloatingType())
10693 return true;
10694
10695 if (QT->isIntegerType())
10696 return true;
10697
10698 if (QT->isPointerType())
10699 return true;
10700
10701 // TODO: Add support for complex types (section 3.1.2, item 2).
10702
10703 return false;
10704}
10705
10706/// Computes the lane size (LS) of a return type or of an input parameter,
10707/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10708/// TODO: Add support for references, section 3.2.1, item 1.
10709static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10710 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10712 if (getAArch64PBV(PTy, C))
10713 return C.getTypeSize(PTy);
10714 }
10715 if (getAArch64PBV(QT, C))
10716 return C.getTypeSize(QT);
10717
10718 return C.getTypeSize(C.getUIntPtrType());
10719}
10720
10721// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10722// signature of the scalar function, as defined in 3.2.2 of the
10723// AAVFABI.
10724static std::tuple<unsigned, unsigned, bool>
10726 QualType RetType = FD->getReturnType().getCanonicalType();
10727
10728 ASTContext &C = FD->getASTContext();
10729
10730 bool OutputBecomesInput = false;
10731
10733 if (!RetType->isVoidType()) {
10734 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10735 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10736 OutputBecomesInput = true;
10737 }
10738 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10740 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10741 }
10742
10743 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10744 // The LS of a function parameter / return value can only be a power
10745 // of 2, starting from 8 bits, up to 128.
10746 assert(llvm::all_of(Sizes,
10747 [](unsigned Size) {
10748 return Size == 8 || Size == 16 || Size == 32 ||
10749 Size == 64 || Size == 128;
10750 }) &&
10751 "Invalid size");
10752
10753 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10754 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10755 OutputBecomesInput);
10756}
10757
10758// Function used to add the attribute. The parameter `VLEN` is
10759// templated to allow the use of "x" when targeting scalable functions
10760// for SVE.
10761template <typename T>
10762static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10763 char ISA, StringRef ParSeq,
10764 StringRef MangledName, bool OutputBecomesInput,
10765 llvm::Function *Fn) {
10766 SmallString<256> Buffer;
10767 llvm::raw_svector_ostream Out(Buffer);
10768 Out << Prefix << ISA << LMask << VLEN;
10769 if (OutputBecomesInput)
10770 Out << "v";
10771 Out << ParSeq << "_" << MangledName;
10772 Fn->addFnAttr(Out.str());
10773}
10774
10775// Helper function to generate the Advanced SIMD names depending on
10776// the value of the NDS when simdlen is not present.
10777static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10778 StringRef Prefix, char ISA,
10779 StringRef ParSeq, StringRef MangledName,
10780 bool OutputBecomesInput,
10781 llvm::Function *Fn) {
10782 switch (NDS) {
10783 case 8:
10784 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10785 OutputBecomesInput, Fn);
10786 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10787 OutputBecomesInput, Fn);
10788 break;
10789 case 16:
10790 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10791 OutputBecomesInput, Fn);
10792 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10793 OutputBecomesInput, Fn);
10794 break;
10795 case 32:
10796 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10797 OutputBecomesInput, Fn);
10798 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10799 OutputBecomesInput, Fn);
10800 break;
10801 case 64:
10802 case 128:
10803 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10804 OutputBecomesInput, Fn);
10805 break;
10806 default:
10807 llvm_unreachable("Scalar type is too wide.");
10808 }
10809}
10810
10811/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10813 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10814 ArrayRef<ParamAttrTy> ParamAttrs,
10815 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10816 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10817
10818 // Get basic data for building the vector signature.
10819 const auto Data = getNDSWDS(FD, ParamAttrs);
10820 const unsigned NDS = std::get<0>(Data);
10821 const unsigned WDS = std::get<1>(Data);
10822 const bool OutputBecomesInput = std::get<2>(Data);
10823
10824 // Check the values provided via `simdlen` by the user.
10825 // 1. A `simdlen(1)` doesn't produce vector signatures,
10826 if (UserVLEN == 1) {
10827 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10829 "The clause simdlen(1) has no effect when targeting aarch64.");
10830 CGM.getDiags().Report(SLoc, DiagID);
10831 return;
10832 }
10833
10834 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10835 // Advanced SIMD output.
10836 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10837 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10838 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10839 "power of 2 when targeting Advanced SIMD.");
10840 CGM.getDiags().Report(SLoc, DiagID);
10841 return;
10842 }
10843
10844 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10845 // limits.
10846 if (ISA == 's' && UserVLEN != 0) {
10847 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10848 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10849 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10850 "lanes in the architectural constraints "
10851 "for SVE (min is 128-bit, max is "
10852 "2048-bit, by steps of 128-bit)");
10853 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10854 return;
10855 }
10856 }
10857
10858 // Sort out parameter sequence.
10859 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10860 StringRef Prefix = "_ZGV";
10861 // Generate simdlen from user input (if any).
10862 if (UserVLEN) {
10863 if (ISA == 's') {
10864 // SVE generates only a masked function.
10865 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10866 OutputBecomesInput, Fn);
10867 } else {
10868 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10869 // Advanced SIMD generates one or two functions, depending on
10870 // the `[not]inbranch` clause.
10871 switch (State) {
10872 case OMPDeclareSimdDeclAttr::BS_Undefined:
10873 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10874 OutputBecomesInput, Fn);
10875 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10876 OutputBecomesInput, Fn);
10877 break;
10878 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10879 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10880 OutputBecomesInput, Fn);
10881 break;
10882 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10883 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10884 OutputBecomesInput, Fn);
10885 break;
10886 }
10887 }
10888 } else {
10889 // If no user simdlen is provided, follow the AAVFABI rules for
10890 // generating the vector length.
10891 if (ISA == 's') {
10892 // SVE, section 3.4.1, item 1.
10893 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10894 OutputBecomesInput, Fn);
10895 } else {
10896 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10897 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10898 // two vector names depending on the use of the clause
10899 // `[not]inbranch`.
10900 switch (State) {
10901 case OMPDeclareSimdDeclAttr::BS_Undefined:
10902 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10903 OutputBecomesInput, Fn);
10904 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10905 OutputBecomesInput, Fn);
10906 break;
10907 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10908 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10909 OutputBecomesInput, Fn);
10910 break;
10911 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10912 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10913 OutputBecomesInput, Fn);
10914 break;
10915 }
10916 }
10917 }
10918}
10919
10921 llvm::Function *Fn) {
10923 FD = FD->getMostRecentDecl();
10924 while (FD) {
10925 // Map params to their positions in function decl.
10926 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10927 if (isa<CXXMethodDecl>(FD))
10928 ParamPositions.try_emplace(FD, 0);
10929 unsigned ParamPos = ParamPositions.size();
10930 for (const ParmVarDecl *P : FD->parameters()) {
10931 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10932 ++ParamPos;
10933 }
10934 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10935 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10936 // Mark uniform parameters.
10937 for (const Expr *E : Attr->uniforms()) {
10938 E = E->IgnoreParenImpCasts();
10939 unsigned Pos;
10940 if (isa<CXXThisExpr>(E)) {
10941 Pos = ParamPositions[FD];
10942 } else {
10943 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10944 ->getCanonicalDecl();
10945 auto It = ParamPositions.find(PVD);
10946 assert(It != ParamPositions.end() && "Function parameter not found");
10947 Pos = It->second;
10948 }
10949 ParamAttrs[Pos].Kind = Uniform;
10950 }
10951 // Get alignment info.
10952 auto *NI = Attr->alignments_begin();
10953 for (const Expr *E : Attr->aligneds()) {
10954 E = E->IgnoreParenImpCasts();
10955 unsigned Pos;
10956 QualType ParmTy;
10957 if (isa<CXXThisExpr>(E)) {
10958 Pos = ParamPositions[FD];
10959 ParmTy = E->getType();
10960 } else {
10961 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10962 ->getCanonicalDecl();
10963 auto It = ParamPositions.find(PVD);
10964 assert(It != ParamPositions.end() && "Function parameter not found");
10965 Pos = It->second;
10966 ParmTy = PVD->getType();
10967 }
10968 ParamAttrs[Pos].Alignment =
10969 (*NI)
10970 ? (*NI)->EvaluateKnownConstInt(C)
10971 : llvm::APSInt::getUnsigned(
10972 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10973 .getQuantity());
10974 ++NI;
10975 }
10976 // Mark linear parameters.
10977 auto *SI = Attr->steps_begin();
10978 auto *MI = Attr->modifiers_begin();
10979 for (const Expr *E : Attr->linears()) {
10980 E = E->IgnoreParenImpCasts();
10981 unsigned Pos;
10982 bool IsReferenceType = false;
10983 // Rescaling factor needed to compute the linear parameter
10984 // value in the mangled name.
10985 unsigned PtrRescalingFactor = 1;
10986 if (isa<CXXThisExpr>(E)) {
10987 Pos = ParamPositions[FD];
10988 auto *P = cast<PointerType>(E->getType());
10989 PtrRescalingFactor = CGM.getContext()
10990 .getTypeSizeInChars(P->getPointeeType())
10991 .getQuantity();
10992 } else {
10993 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10994 ->getCanonicalDecl();
10995 auto It = ParamPositions.find(PVD);
10996 assert(It != ParamPositions.end() && "Function parameter not found");
10997 Pos = It->second;
10998 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10999 PtrRescalingFactor = CGM.getContext()
11000 .getTypeSizeInChars(P->getPointeeType())
11001 .getQuantity();
11002 else if (PVD->getType()->isReferenceType()) {
11003 IsReferenceType = true;
11004 PtrRescalingFactor =
11005 CGM.getContext()
11006 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11007 .getQuantity();
11008 }
11009 }
11010 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11011 if (*MI == OMPC_LINEAR_ref)
11012 ParamAttr.Kind = LinearRef;
11013 else if (*MI == OMPC_LINEAR_uval)
11014 ParamAttr.Kind = LinearUVal;
11015 else if (IsReferenceType)
11016 ParamAttr.Kind = LinearVal;
11017 else
11018 ParamAttr.Kind = Linear;
11019 // Assuming a stride of 1, for `linear` without modifiers.
11020 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11021 if (*SI) {
11023 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11024 if (const auto *DRE =
11025 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11026 if (const auto *StridePVD =
11027 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11028 ParamAttr.HasVarStride = true;
11029 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11030 assert(It != ParamPositions.end() &&
11031 "Function parameter not found");
11032 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11033 }
11034 }
11035 } else {
11036 ParamAttr.StrideOrArg = Result.Val.getInt();
11037 }
11038 }
11039 // If we are using a linear clause on a pointer, we need to
11040 // rescale the value of linear_step with the byte size of the
11041 // pointee type.
11042 if (!ParamAttr.HasVarStride &&
11043 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11044 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11045 ++SI;
11046 ++MI;
11047 }
11048 llvm::APSInt VLENVal;
11049 SourceLocation ExprLoc;
11050 const Expr *VLENExpr = Attr->getSimdlen();
11051 if (VLENExpr) {
11052 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11053 ExprLoc = VLENExpr->getExprLoc();
11054 }
11055 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11056 if (CGM.getTriple().isX86()) {
11057 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11058 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11059 unsigned VLEN = VLENVal.getExtValue();
11060 StringRef MangledName = Fn->getName();
11061 if (CGM.getTarget().hasFeature("sve"))
11062 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11063 MangledName, 's', 128, Fn, ExprLoc);
11064 else if (CGM.getTarget().hasFeature("neon"))
11065 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11066 MangledName, 'n', 128, Fn, ExprLoc);
11067 }
11068 }
11069 FD = FD->getPreviousDecl();
11070 }
11071}
11072
11073namespace {
11074/// Cleanup action for doacross support.
11075class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11076public:
11077 static const int DoacrossFinArgs = 2;
11078
11079private:
11080 llvm::FunctionCallee RTLFn;
11081 llvm::Value *Args[DoacrossFinArgs];
11082
11083public:
11084 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11085 ArrayRef<llvm::Value *> CallArgs)
11086 : RTLFn(RTLFn) {
11087 assert(CallArgs.size() == DoacrossFinArgs);
11088 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11089 }
11090 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11091 if (!CGF.HaveInsertPoint())
11092 return;
11093 CGF.EmitRuntimeCall(RTLFn, Args);
11094 }
11095};
11096} // namespace
11097
11099 const OMPLoopDirective &D,
11100 ArrayRef<Expr *> NumIterations) {
11101 if (!CGF.HaveInsertPoint())
11102 return;
11103
11105 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11106 RecordDecl *RD;
11107 if (KmpDimTy.isNull()) {
11108 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11109 // kmp_int64 lo; // lower
11110 // kmp_int64 up; // upper
11111 // kmp_int64 st; // stride
11112 // };
11113 RD = C.buildImplicitRecord("kmp_dim");
11114 RD->startDefinition();
11115 addFieldToRecordDecl(C, RD, Int64Ty);
11116 addFieldToRecordDecl(C, RD, Int64Ty);
11117 addFieldToRecordDecl(C, RD, Int64Ty);
11118 RD->completeDefinition();
11119 KmpDimTy = C.getRecordType(RD);
11120 } else {
11121 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11122 }
11123 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11124 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11126
11127 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11128 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11129 enum { LowerFD = 0, UpperFD, StrideFD };
11130 // Fill dims with data.
11131 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11132 LValue DimsLVal = CGF.MakeAddrLValue(
11133 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11134 // dims.upper = num_iterations;
11135 LValue UpperLVal = CGF.EmitLValueForField(
11136 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11137 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11138 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11139 Int64Ty, NumIterations[I]->getExprLoc());
11140 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11141 // dims.stride = 1;
11142 LValue StrideLVal = CGF.EmitLValueForField(
11143 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11144 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11145 StrideLVal);
11146 }
11147
11148 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11149 // kmp_int32 num_dims, struct kmp_dim * dims);
11150 llvm::Value *Args[] = {
11152 getThreadID(CGF, D.getBeginLoc()),
11153 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11155 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11156 CGM.VoidPtrTy)};
11157
11158 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11159 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11160 CGF.EmitRuntimeCall(RTLFn, Args);
11161 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11162 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11163 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11164 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11165 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11166 llvm::ArrayRef(FiniArgs));
11167}
11168
11169template <typename T>
11171 const T *C, llvm::Value *ULoc,
11172 llvm::Value *ThreadID) {
11173 QualType Int64Ty =
11174 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11175 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11177 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11178 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11179 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11180 const Expr *CounterVal = C->getLoopData(I);
11181 assert(CounterVal);
11182 llvm::Value *CntVal = CGF.EmitScalarConversion(
11183 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11184 CounterVal->getExprLoc());
11185 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11186 /*Volatile=*/false, Int64Ty);
11187 }
11188 llvm::Value *Args[] = {
11189 ULoc, ThreadID,
11190 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11191 llvm::FunctionCallee RTLFn;
11192 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11193 OMPDoacrossKind<T> ODK;
11194 if (ODK.isSource(C)) {
11195 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11196 OMPRTL___kmpc_doacross_post);
11197 } else {
11198 assert(ODK.isSink(C) && "Expect sink modifier.");
11199 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11200 OMPRTL___kmpc_doacross_wait);
11201 }
11202 CGF.EmitRuntimeCall(RTLFn, Args);
11203}
11204
11206 const OMPDependClause *C) {
11207 return EmitDoacrossOrdered<OMPDependClause>(
11208 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11209 getThreadID(CGF, C->getBeginLoc()));
11210}
11211
11213 const OMPDoacrossClause *C) {
11214 return EmitDoacrossOrdered<OMPDoacrossClause>(
11215 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11216 getThreadID(CGF, C->getBeginLoc()));
11217}
11218
11220 llvm::FunctionCallee Callee,
11221 ArrayRef<llvm::Value *> Args) const {
11222 assert(Loc.isValid() && "Outlined function call location must be valid.");
11224
11225 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11226 if (Fn->doesNotThrow()) {
11227 CGF.EmitNounwindRuntimeCall(Fn, Args);
11228 return;
11229 }
11230 }
11231 CGF.EmitRuntimeCall(Callee, Args);
11232}
11233
11235 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11236 ArrayRef<llvm::Value *> Args) const {
11237 emitCall(CGF, Loc, OutlinedFn, Args);
11238}
11239
11241 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11242 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11244}
11245
11247 const VarDecl *NativeParam,
11248 const VarDecl *TargetParam) const {
11249 return CGF.GetAddrOfLocalVar(NativeParam);
11250}
11251
11252/// Return allocator value from expression, or return a null allocator (default
11253/// when no allocator specified).
11254static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11255 const Expr *Allocator) {
11256 llvm::Value *AllocVal;
11257 if (Allocator) {
11258 AllocVal = CGF.EmitScalarExpr(Allocator);
11259 // According to the standard, the original allocator type is a enum
11260 // (integer). Convert to pointer type, if required.
11261 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11262 CGF.getContext().VoidPtrTy,
11263 Allocator->getExprLoc());
11264 } else {
11265 // If no allocator specified, it defaults to the null allocator.
11266 AllocVal = llvm::Constant::getNullValue(
11268 }
11269 return AllocVal;
11270}
11271
11272/// Return the alignment from an allocate directive if present.
11273static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11274 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11275
11276 if (!AllocateAlignment)
11277 return nullptr;
11278
11279 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11280}
11281
11283 const VarDecl *VD) {
11284 if (!VD)
11285 return Address::invalid();
11286 Address UntiedAddr = Address::invalid();
11287 Address UntiedRealAddr = Address::invalid();
11288 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11289 if (It != FunctionToUntiedTaskStackMap.end()) {
11290 const UntiedLocalVarsAddressesMap &UntiedData =
11291 UntiedLocalVarsStack[It->second];
11292 auto I = UntiedData.find(VD);
11293 if (I != UntiedData.end()) {
11294 UntiedAddr = I->second.first;
11295 UntiedRealAddr = I->second.second;
11296 }
11297 }
11298 const VarDecl *CVD = VD->getCanonicalDecl();
11299 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11300 // Use the default allocation.
11301 if (!isAllocatableDecl(VD))
11302 return UntiedAddr;
11303 llvm::Value *Size;
11304 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11305 if (CVD->getType()->isVariablyModifiedType()) {
11306 Size = CGF.getTypeSize(CVD->getType());
11307 // Align the size: ((size + align - 1) / align) * align
11308 Size = CGF.Builder.CreateNUWAdd(
11309 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11310 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11311 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11312 } else {
11314 Size = CGM.getSize(Sz.alignTo(Align));
11315 }
11316 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11317 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11318 const Expr *Allocator = AA->getAllocator();
11319 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11320 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11322 Args.push_back(ThreadID);
11323 if (Alignment)
11324 Args.push_back(Alignment);
11325 Args.push_back(Size);
11326 Args.push_back(AllocVal);
11327 llvm::omp::RuntimeFunction FnID =
11328 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11329 llvm::Value *Addr = CGF.EmitRuntimeCall(
11330 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11331 getName({CVD->getName(), ".void.addr"}));
11332 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11333 CGM.getModule(), OMPRTL___kmpc_free);
11336 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11337 if (UntiedAddr.isValid())
11338 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11339
11340 // Cleanup action for allocate support.
11341 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11342 llvm::FunctionCallee RTLFn;
11343 SourceLocation::UIntTy LocEncoding;
11344 Address Addr;
11345 const Expr *AllocExpr;
11346
11347 public:
11348 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11349 SourceLocation::UIntTy LocEncoding, Address Addr,
11350 const Expr *AllocExpr)
11351 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11352 AllocExpr(AllocExpr) {}
11353 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11354 if (!CGF.HaveInsertPoint())
11355 return;
11356 llvm::Value *Args[3];
11357 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11358 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11360 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11361 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11362 Args[2] = AllocVal;
11363 CGF.EmitRuntimeCall(RTLFn, Args);
11364 }
11365 };
11366 Address VDAddr =
11367 UntiedRealAddr.isValid()
11368 ? UntiedRealAddr
11369 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11370 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11371 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11372 VDAddr, Allocator);
11373 if (UntiedRealAddr.isValid())
11374 if (auto *Region =
11375 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11376 Region->emitUntiedSwitch(CGF);
11377 return VDAddr;
11378 }
11379 return UntiedAddr;
11380}
11381
11383 const VarDecl *VD) const {
11384 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11385 if (It == FunctionToUntiedTaskStackMap.end())
11386 return false;
11387 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11388}
11389
11392 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11393 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11394 if (!NeedToPush)
11395 return;
11397 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11398 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11399 for (const Stmt *Ref : C->private_refs()) {
11400 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11401 const ValueDecl *VD;
11402 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11403 VD = DRE->getDecl();
11404 } else {
11405 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11406 assert((ME->isImplicitCXXThis() ||
11407 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11408 "Expected member of current class.");
11409 VD = ME->getMemberDecl();
11410 }
11411 DS.insert(VD);
11412 }
11413 }
11414}
11415
11417 if (!NeedToPush)
11418 return;
11420}
11421
11423 CodeGenFunction &CGF,
11424 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11425 std::pair<Address, Address>> &LocalVars)
11426 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11427 if (!NeedToPush)
11428 return;
11430 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11431 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11432}
11433
11435 if (!NeedToPush)
11436 return;
11438}
11439
11441 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11442
11443 return llvm::any_of(
11445 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11446}
11447
11448void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11449 const OMPExecutableDirective &S,
11450 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11451 const {
11453 // Vars in target/task regions must be excluded completely.
11454 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11455 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11457 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11458 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11459 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11460 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11461 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11462 }
11463 }
11464 // Exclude vars in private clauses.
11465 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11466 for (const Expr *Ref : C->varlists()) {
11467 if (!Ref->getType()->isScalarType())
11468 continue;
11469 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11470 if (!DRE)
11471 continue;
11472 NeedToCheckForLPCs.insert(DRE->getDecl());
11473 }
11474 }
11475 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11476 for (const Expr *Ref : C->varlists()) {
11477 if (!Ref->getType()->isScalarType())
11478 continue;
11479 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11480 if (!DRE)
11481 continue;
11482 NeedToCheckForLPCs.insert(DRE->getDecl());
11483 }
11484 }
11485 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11486 for (const Expr *Ref : C->varlists()) {
11487 if (!Ref->getType()->isScalarType())
11488 continue;
11489 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11490 if (!DRE)
11491 continue;
11492 NeedToCheckForLPCs.insert(DRE->getDecl());
11493 }
11494 }
11495 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11496 for (const Expr *Ref : C->varlists()) {
11497 if (!Ref->getType()->isScalarType())
11498 continue;
11499 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11500 if (!DRE)
11501 continue;
11502 NeedToCheckForLPCs.insert(DRE->getDecl());
11503 }
11504 }
11505 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11506 for (const Expr *Ref : C->varlists()) {
11507 if (!Ref->getType()->isScalarType())
11508 continue;
11509 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11510 if (!DRE)
11511 continue;
11512 NeedToCheckForLPCs.insert(DRE->getDecl());
11513 }
11514 }
11515 for (const Decl *VD : NeedToCheckForLPCs) {
11516 for (const LastprivateConditionalData &Data :
11518 if (Data.DeclToUniqueName.count(VD) > 0) {
11519 if (!Data.Disabled)
11520 NeedToAddForLPCsAsDisabled.insert(VD);
11521 break;
11522 }
11523 }
11524 }
11525}
11526
11527CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11528 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11529 : CGM(CGF.CGM),
11530 Action((CGM.getLangOpts().OpenMP >= 50 &&
11531 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11532 [](const OMPLastprivateClause *C) {
11533 return C->getKind() ==
11534 OMPC_LASTPRIVATE_conditional;
11535 }))
11536 ? ActionToDo::PushAsLastprivateConditional
11537 : ActionToDo::DoNotPush) {
11538 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11539 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11540 return;
11541 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11542 "Expected a push action.");
11545 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11546 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11547 continue;
11548
11549 for (const Expr *Ref : C->varlists()) {
11550 Data.DeclToUniqueName.insert(std::make_pair(
11551 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11552 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11553 }
11554 }
11555 Data.IVLVal = IVLVal;
11556 Data.Fn = CGF.CurFn;
11557}
11558
11559CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11561 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11562 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11563 if (CGM.getLangOpts().OpenMP < 50)
11564 return;
11565 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11566 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11567 if (!NeedToAddForLPCsAsDisabled.empty()) {
11568 Action = ActionToDo::DisableLastprivateConditional;
11569 LastprivateConditionalData &Data =
11570 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11571 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11572 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11573 Data.Fn = CGF.CurFn;
11574 Data.Disabled = true;
11575 }
11576}
11577
11580 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11581 return LastprivateConditionalRAII(CGF, S);
11582}
11583
11585 if (CGM.getLangOpts().OpenMP < 50)
11586 return;
11587 if (Action == ActionToDo::DisableLastprivateConditional) {
11588 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11589 "Expected list of disabled private vars.");
11591 }
11592 if (Action == ActionToDo::PushAsLastprivateConditional) {
11593 assert(
11595 "Expected list of lastprivate conditional vars.");
11597 }
11598}
11599
11601 const VarDecl *VD) {
11603 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11604 if (I == LastprivateConditionalToTypes.end())
11605 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11606 QualType NewType;
11607 const FieldDecl *VDField;
11608 const FieldDecl *FiredField;
11609 LValue BaseLVal;
11610 auto VI = I->getSecond().find(VD);
11611 if (VI == I->getSecond().end()) {
11612 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11613 RD->startDefinition();
11614 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11615 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11616 RD->completeDefinition();
11617 NewType = C.getRecordType(RD);
11618 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11619 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11620 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11621 } else {
11622 NewType = std::get<0>(VI->getSecond());
11623 VDField = std::get<1>(VI->getSecond());
11624 FiredField = std::get<2>(VI->getSecond());
11625 BaseLVal = std::get<3>(VI->getSecond());
11626 }
11627 LValue FiredLVal =
11628 CGF.EmitLValueForField(BaseLVal, FiredField);
11630 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11631 FiredLVal);
11632 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11633}
11634
11635namespace {
11636/// Checks if the lastprivate conditional variable is referenced in LHS.
11637class LastprivateConditionalRefChecker final
11638 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11640 const Expr *FoundE = nullptr;
11641 const Decl *FoundD = nullptr;
11642 StringRef UniqueDeclName;
11643 LValue IVLVal;
11644 llvm::Function *FoundFn = nullptr;
11646
11647public:
11648 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11650 llvm::reverse(LPM)) {
11651 auto It = D.DeclToUniqueName.find(E->getDecl());
11652 if (It == D.DeclToUniqueName.end())
11653 continue;
11654 if (D.Disabled)
11655 return false;
11656 FoundE = E;
11657 FoundD = E->getDecl()->getCanonicalDecl();
11658 UniqueDeclName = It->second;
11659 IVLVal = D.IVLVal;
11660 FoundFn = D.Fn;
11661 break;
11662 }
11663 return FoundE == E;
11664 }
11665 bool VisitMemberExpr(const MemberExpr *E) {
11666 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11667 return false;
11669 llvm::reverse(LPM)) {
11670 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11671 if (It == D.DeclToUniqueName.end())
11672 continue;
11673 if (D.Disabled)
11674 return false;
11675 FoundE = E;
11676 FoundD = E->getMemberDecl()->getCanonicalDecl();
11677 UniqueDeclName = It->second;
11678 IVLVal = D.IVLVal;
11679 FoundFn = D.Fn;
11680 break;
11681 }
11682 return FoundE == E;
11683 }
11684 bool VisitStmt(const Stmt *S) {
11685 for (const Stmt *Child : S->children()) {
11686 if (!Child)
11687 continue;
11688 if (const auto *E = dyn_cast<Expr>(Child))
11689 if (!E->isGLValue())
11690 continue;
11691 if (Visit(Child))
11692 return true;
11693 }
11694 return false;
11695 }
11696 explicit LastprivateConditionalRefChecker(
11698 : LPM(LPM) {}
11699 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11700 getFoundData() const {
11701 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11702 }
11703};
11704} // namespace
11705
11707 LValue IVLVal,
11708 StringRef UniqueDeclName,
11709 LValue LVal,
11711 // Last updated loop counter for the lastprivate conditional var.
11712 // int<xx> last_iv = 0;
11713 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11714 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11715 LLIVTy, getName({UniqueDeclName, "iv"}));
11716 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11717 IVLVal.getAlignment().getAsAlign());
11718 LValue LastIVLVal =
11719 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11720
11721 // Last value of the lastprivate conditional.
11722 // decltype(priv_a) last_a;
11723 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11724 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11725 cast<llvm::GlobalVariable>(Last)->setAlignment(
11726 LVal.getAlignment().getAsAlign());
11727 LValue LastLVal =
11728 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11729
11730 // Global loop counter. Required to handle inner parallel-for regions.
11731 // iv
11732 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11733
11734 // #pragma omp critical(a)
11735 // if (last_iv <= iv) {
11736 // last_iv = iv;
11737 // last_a = priv_a;
11738 // }
11739 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11740 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11741 Action.Enter(CGF);
11742 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11743 // (last_iv <= iv) ? Check if the variable is updated and store new
11744 // value in global var.
11745 llvm::Value *CmpRes;
11746 if (IVLVal.getType()->isSignedIntegerType()) {
11747 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11748 } else {
11749 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11750 "Loop iteration variable must be integer.");
11751 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11752 }
11753 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11754 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11755 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11756 // {
11757 CGF.EmitBlock(ThenBB);
11758
11759 // last_iv = iv;
11760 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11761
11762 // last_a = priv_a;
11763 switch (CGF.getEvaluationKind(LVal.getType())) {
11764 case TEK_Scalar: {
11765 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11766 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11767 break;
11768 }
11769 case TEK_Complex: {
11771 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11772 break;
11773 }
11774 case TEK_Aggregate:
11775 llvm_unreachable(
11776 "Aggregates are not supported in lastprivate conditional.");
11777 }
11778 // }
11779 CGF.EmitBranch(ExitBB);
11780 // There is no need to emit line number for unconditional branch.
11782 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11783 };
11784
11785 if (CGM.getLangOpts().OpenMPSimd) {
11786 // Do not emit as a critical region as no parallel region could be emitted.
11787 RegionCodeGenTy ThenRCG(CodeGen);
11788 ThenRCG(CGF);
11789 } else {
11790 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11791 }
11792}
11793
11795 const Expr *LHS) {
11796 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11797 return;
11798 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11799 if (!Checker.Visit(LHS))
11800 return;
11801 const Expr *FoundE;
11802 const Decl *FoundD;
11803 StringRef UniqueDeclName;
11804 LValue IVLVal;
11805 llvm::Function *FoundFn;
11806 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11807 Checker.getFoundData();
11808 if (FoundFn != CGF.CurFn) {
11809 // Special codegen for inner parallel regions.
11810 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11811 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11812 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11813 "Lastprivate conditional is not found in outer region.");
11814 QualType StructTy = std::get<0>(It->getSecond());
11815 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11816 LValue PrivLVal = CGF.EmitLValue(FoundE);
11818 PrivLVal.getAddress(),
11819 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11820 CGF.ConvertTypeForMem(StructTy));
11821 LValue BaseLVal =
11822 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11823 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11824 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11825 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11826 FiredLVal, llvm::AtomicOrdering::Unordered,
11827 /*IsVolatile=*/true, /*isInit=*/false);
11828 return;
11829 }
11830
11831 // Private address of the lastprivate conditional in the current context.
11832 // priv_a
11833 LValue LVal = CGF.EmitLValue(FoundE);
11834 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11835 FoundE->getExprLoc());
11836}
11837
11840 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11841 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11842 return;
11843 auto Range = llvm::reverse(LastprivateConditionalStack);
11844 auto It = llvm::find_if(
11845 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11846 if (It == Range.end() || It->Fn != CGF.CurFn)
11847 return;
11848 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11849 assert(LPCI != LastprivateConditionalToTypes.end() &&
11850 "Lastprivates must be registered already.");
11852 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11853 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11854 for (const auto &Pair : It->DeclToUniqueName) {
11855 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11856 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11857 continue;
11858 auto I = LPCI->getSecond().find(Pair.first);
11859 assert(I != LPCI->getSecond().end() &&
11860 "Lastprivate must be rehistered already.");
11861 // bool Cmp = priv_a.Fired != 0;
11862 LValue BaseLVal = std::get<3>(I->getSecond());
11863 LValue FiredLVal =
11864 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11865 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11866 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11867 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11868 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11869 // if (Cmp) {
11870 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11871 CGF.EmitBlock(ThenBB);
11872 Address Addr = CGF.GetAddrOfLocalVar(VD);
11873 LValue LVal;
11874 if (VD->getType()->isReferenceType())
11875 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11877 else
11878 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11880 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11881 D.getBeginLoc());
11883 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11884 // }
11885 }
11886}
11887
11889 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11891 if (CGF.getLangOpts().OpenMP < 50)
11892 return;
11893 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11894 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11895 "Unknown lastprivate conditional variable.");
11896 StringRef UniqueName = It->second;
11897 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11898 // The variable was not updated in the region - exit.
11899 if (!GV)
11900 return;
11901 LValue LPLVal = CGF.MakeRawAddrLValue(
11902 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11903 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11904 CGF.EmitStoreOfScalar(Res, PrivLVal);
11905}
11906
11909 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11910 const RegionCodeGenTy &CodeGen) {
11911 llvm_unreachable("Not supported in SIMD-only mode");
11912}
11913
11916 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11917 const RegionCodeGenTy &CodeGen) {
11918 llvm_unreachable("Not supported in SIMD-only mode");
11919}
11920
11922 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11923 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11924 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11925 bool Tied, unsigned &NumberOfParts) {
11926 llvm_unreachable("Not supported in SIMD-only mode");
11927}
11928
11931 llvm::Function *OutlinedFn,
11932 ArrayRef<llvm::Value *> CapturedVars,
11933 const Expr *IfCond,
11934 llvm::Value *NumThreads) {
11935 llvm_unreachable("Not supported in SIMD-only mode");
11936}
11937
11939 CodeGenFunction &CGF, StringRef CriticalName,
11940 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11941 const Expr *Hint) {
11942 llvm_unreachable("Not supported in SIMD-only mode");
11943}
11944
11946 const RegionCodeGenTy &MasterOpGen,
11948 llvm_unreachable("Not supported in SIMD-only mode");
11949}
11950
11952 const RegionCodeGenTy &MasterOpGen,
11954 const Expr *Filter) {
11955 llvm_unreachable("Not supported in SIMD-only mode");
11956}
11957
11960 llvm_unreachable("Not supported in SIMD-only mode");
11961}
11962
11964 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11966 llvm_unreachable("Not supported in SIMD-only mode");
11967}
11968
11970 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11971 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11973 ArrayRef<const Expr *> AssignmentOps) {
11974 llvm_unreachable("Not supported in SIMD-only mode");
11975}
11976
11978 const RegionCodeGenTy &OrderedOpGen,
11980 bool IsThreads) {
11981 llvm_unreachable("Not supported in SIMD-only mode");
11982}
11983
11987 bool EmitChecks,
11988 bool ForceSimpleCall) {
11989 llvm_unreachable("Not supported in SIMD-only mode");
11990}
11991
11994 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11995 bool Ordered, const DispatchRTInput &DispatchValues) {
11996 llvm_unreachable("Not supported in SIMD-only mode");
11997}
11998
12001 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12002 llvm_unreachable("Not supported in SIMD-only mode");
12003}
12004
12007 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12008 llvm_unreachable("Not supported in SIMD-only mode");
12009}
12010
12013 unsigned IVSize,
12014 bool IVSigned) {
12015 llvm_unreachable("Not supported in SIMD-only mode");
12016}
12017
12020 OpenMPDirectiveKind DKind) {
12021 llvm_unreachable("Not supported in SIMD-only mode");
12022}
12023
12026 unsigned IVSize, bool IVSigned,
12027 Address IL, Address LB,
12028 Address UB, Address ST) {
12029 llvm_unreachable("Not supported in SIMD-only mode");
12030}
12031
12033 llvm::Value *NumThreads,
12035 llvm_unreachable("Not supported in SIMD-only mode");
12036}
12037
12039 ProcBindKind ProcBind,
12041 llvm_unreachable("Not supported in SIMD-only mode");
12042}
12043
12045 const VarDecl *VD,
12046 Address VDAddr,
12048 llvm_unreachable("Not supported in SIMD-only mode");
12049}
12050
12052 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12053 CodeGenFunction *CGF) {
12054 llvm_unreachable("Not supported in SIMD-only mode");
12055}
12056
12058 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12059 llvm_unreachable("Not supported in SIMD-only mode");
12060}
12061
12065 llvm::AtomicOrdering AO) {
12066 llvm_unreachable("Not supported in SIMD-only mode");
12067}
12068
12070 const OMPExecutableDirective &D,
12071 llvm::Function *TaskFunction,
12072 QualType SharedsTy, Address Shareds,
12073 const Expr *IfCond,
12074 const OMPTaskDataTy &Data) {
12075 llvm_unreachable("Not supported in SIMD-only mode");
12076}
12077
12080 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12081 const Expr *IfCond, const OMPTaskDataTy &Data) {
12082 llvm_unreachable("Not supported in SIMD-only mode");
12083}
12084
12088 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12089 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12090 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12091 ReductionOps, Options);
12092}
12093
12096 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12097 llvm_unreachable("Not supported in SIMD-only mode");
12098}
12099
12102 bool IsWorksharingReduction) {
12103 llvm_unreachable("Not supported in SIMD-only mode");
12104}
12105
12108 ReductionCodeGen &RCG,
12109 unsigned N) {
12110 llvm_unreachable("Not supported in SIMD-only mode");
12111}
12112
12115 llvm::Value *ReductionsPtr,
12116 LValue SharedLVal) {
12117 llvm_unreachable("Not supported in SIMD-only mode");
12118}
12119
12122 const OMPTaskDataTy &Data) {
12123 llvm_unreachable("Not supported in SIMD-only mode");
12124}
12125
12128 OpenMPDirectiveKind CancelRegion) {
12129 llvm_unreachable("Not supported in SIMD-only mode");
12130}
12131
12133 SourceLocation Loc, const Expr *IfCond,
12134 OpenMPDirectiveKind CancelRegion) {
12135 llvm_unreachable("Not supported in SIMD-only mode");
12136}
12137
12139 const OMPExecutableDirective &D, StringRef ParentName,
12140 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12141 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12142 llvm_unreachable("Not supported in SIMD-only mode");
12143}
12144
12147 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12148 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12149 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12150 const OMPLoopDirective &D)>
12151 SizeEmitter) {
12152 llvm_unreachable("Not supported in SIMD-only mode");
12153}
12154
12156 llvm_unreachable("Not supported in SIMD-only mode");
12157}
12158
12160 llvm_unreachable("Not supported in SIMD-only mode");
12161}
12162
12164 return false;
12165}
12166
12168 const OMPExecutableDirective &D,
12170 llvm::Function *OutlinedFn,
12171 ArrayRef<llvm::Value *> CapturedVars) {
12172 llvm_unreachable("Not supported in SIMD-only mode");
12173}
12174
12176 const Expr *NumTeams,
12177 const Expr *ThreadLimit,
12179 llvm_unreachable("Not supported in SIMD-only mode");
12180}
12181
12183 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12184 const Expr *Device, const RegionCodeGenTy &CodeGen,
12186 llvm_unreachable("Not supported in SIMD-only mode");
12187}
12188
12190 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12191 const Expr *Device) {
12192 llvm_unreachable("Not supported in SIMD-only mode");
12193}
12194
12196 const OMPLoopDirective &D,
12197 ArrayRef<Expr *> NumIterations) {
12198 llvm_unreachable("Not supported in SIMD-only mode");
12199}
12200
12202 const OMPDependClause *C) {
12203 llvm_unreachable("Not supported in SIMD-only mode");
12204}
12205
12207 const OMPDoacrossClause *C) {
12208 llvm_unreachable("Not supported in SIMD-only mode");
12209}
12210
12211const VarDecl *
12213 const VarDecl *NativeParam) const {
12214 llvm_unreachable("Not supported in SIMD-only mode");
12215}
12216
12217Address
12219 const VarDecl *NativeParam,
12220 const VarDecl *TargetParam) const {
12221 llvm_unreachable("Not supported in SIMD-only mode");
12222}
#define V(N, I)
Definition: ASTContext.h:3285
StringRef P
#define SM(sm)
Definition: Cuda.cpp:83
Provides LLVM's BitmaskEnum facility to enumeration types declared in namespace clang.
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static void emitOffloadingArrays(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
llvm::Value * emitDynCGGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, const Expr *Ref)
Generates unique name for artificial threadprivate variables.
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
Defines the clang::FileManager interface and associated types.
int Priority
Definition: Format.cpp:2979
#define X(type, name)
Definition: Value.h:143
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
SourceRange Range
Definition: SemaObjC.cpp:754
SourceLocation Loc
Definition: SemaObjC.cpp:755
Defines the SourceManager interface.
const char * Data
This file defines OpenMP AST classes for executable directives and clauses.
SourceLocation Begin
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:182
SourceManager & getSourceManager()
Definition: ASTContext.h:705
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2768
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
Definition: ASTContext.h:2591
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
Definition: ASTContext.h:1118
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition: ASTContext.h:775
CanQualType BoolTy
Definition: ASTContext.h:1092
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1091
const VariableArrayType * getAsVariableArrayType(QualType T) const
Definition: ASTContext.h:2771
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:757
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
Definition: RecordLayout.h:210
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition: Expr.cpp:5068
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3518
Attr - This represents one attribute.
Definition: Attr.h:42
Represents a C++ constructor within a class.
Definition: DeclCXX.h:2535
Represents a C++ destructor within a class.
Definition: DeclCXX.h:2799
Represents a static or instance method of a struct/union/class.
Definition: DeclCXX.h:2060
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition: DeclCXX.h:2186
QualType getFunctionObjectParameterType() const
Definition: DeclCXX.h:2210
Represents a C++ struct/union/class.
Definition: DeclCXX.h:258
base_class_range bases()
Definition: DeclCXX.h:619
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition: DeclCXX.h:1022
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition: DeclCXX.cpp:1641
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition: DeclCXX.h:613
base_class_range vbases()
Definition: DeclCXX.h:636
capture_const_range captures() const
Definition: DeclCXX.h:1101
ctor_range ctors() const
Definition: DeclCXX.h:681
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition: DeclCXX.cpp:1975
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Definition: Redeclarable.h:349
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition: Stmt.h:3770
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition: Stmt.h:3804
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition: Stmt.cpp:1305
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition: Stmt.h:3810
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition: Stmt.h:3798
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition: Stmt.h:3801
This captures a statement into a function.
Definition: Stmt.h:3757
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition: Stmt.h:3908
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition: Stmt.h:3878
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition: Stmt.h:3861
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition: Stmt.cpp:1431
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition: Stmt.h:3903
capture_range captures()
Definition: Stmt.h:3895
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition: CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition: CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition: CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:111
static Address invalid()
Definition: Address.h:153
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:220
CharUnits getAlignment() const
Definition: Address.h:166
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:184
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition: Address.h:226
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:241
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:234
bool isValid() const
Definition: Address.h:154
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:176
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:864
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:871
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:881
CGBlockInfo - Information to generate a block literal.
Definition: CGBlocks.h:156
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:136
llvm::Value * CreateIsNull(Address Addr, const Twine &Name="")
Definition: CGBuilder.h:355
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:292
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:203
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition: CGBuilder.h:241
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:108
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:364
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:278
MangleContext & getMangleContext()
Gets the mangle context.
Definition: CGCXXABI.h:113
CGFunctionInfo - Class to encapsulate the information about a function definition.
Manages list of lastprivate conditional decls for the specified directive.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::FunctionType * Kmpc_MicroTy
The type for a microtask which gets passed to __kmpc_fork_call().
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
llvm::Type * getKmpc_MicroPointerTy()
Returns pointer to kmpc_micro type.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
void emitUDMapperArrayInitOrDel(CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *BasePtr, llvm::Value *Ptr, llvm::Value *Size, llvm::Value *MapType, llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit)
Emit the array initialization or deletion portion for user-defined mapper code generation.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args=std::nullopt) const
Emits Callee function call with arguments Args with location Loc.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Helper to determine the min/max number of threads/teams for D.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
llvm::SmallVector< NontemporalDeclsSet, 4 > NontemporalDeclsStack
Stack for list of declarations in current context marked as nontemporal.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args=std::nullopt) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
CGRecordLayout - This class handles struct and union layout info while lowering AST types to LLVM typ...
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
CGCapturedStmtInfo * CapturedStmtInfo
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
const CodeGen::CGBlockInfo * BlockInfo
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
llvm::Type * ConvertTypeForMem(QualType T)
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
JumpDest ReturnBlock
ReturnBlock - Unified return block.
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
void EmitAutoVarCleanups(const AutoVarEmission &emission)
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
CodeGenTypes & getTypes() const
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
LValue EmitStringLiteralLValue(const StringLiteral *E)
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void handleCUDALaunchBoundsAttr(llvm::Function *F, const CUDALaunchBoundsAttr *A, int32_t *MaxThreadsVal=nullptr, int32_t *MinBlocksVal=nullptr, int32_t *MaxClusterRankVal=nullptr)
Emit the IR encoding to attach the CUDA launch bounds attribute to F.
Definition: NVPTX.cpp:308
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const
Return the store size, in character units, of the given LLVM type.
void handleAMDGPUWavesPerEUAttr(llvm::Function *F, const AMDGPUWavesPerEUAttr *A)
Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to F.
Definition: AMDGPU.cpp:654
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
const TargetInfo & getTarget() const
void EmitGlobal(GlobalDecl D)
Emit code for a single global function or var decl.
void handleAMDGPUFlatWorkGroupSizeAttr(llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, const ReqdWorkGroupSizeAttr *ReqdWGS=nullptr, int32_t *MinThreadsVal=nullptr, int32_t *MaxThreadsVal=nullptr)
Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute to F.
Definition: AMDGPU.cpp:627
llvm::GlobalValue::LinkageTypes getLLVMLinkageVarDefinition(const VarDecl *VD)
Returns LLVM linkage for a declarator.
CGCXXABI & getCXXABI() const
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
const llvm::Triple & getTriple() const
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
llvm::Constant * GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition=NotForDefinition)
ASTContext & getContext() const
const TargetCodeGenInfo & getTargetCodeGenInfo()
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition: CGDecl.cpp:2821
llvm::GlobalValue * GetGlobalValue(StringRef Ref)
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Function * CreateGlobalInitOrCleanUpFunction(llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, SourceLocation Loc=SourceLocation(), bool TLS=false, llvm::GlobalVariable::LinkageTypes Linkage=llvm::GlobalVariable::InternalLinkage)
Definition: CGDeclCXX.cpp:436
llvm::ConstantInt * getSize(CharUnits numChars)
Emit the given number of characters as a value of type size_t.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1632
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:680
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
llvm::Type * ConvertTypeForMem(QualType T, bool ForBitField=false)
ConvertTypeForMem - Convert type T into a llvm::Type.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type 'void ()'.
Definition: CGCall.cpp:722
A specialization of Address that requires the address to be an LLVM Constant.
Definition: Address.h:260
static ConstantAddress invalid()
Definition: Address.h:268
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
void popTerminate()
Pops a terminate handler off the stack.
Definition: CGCleanup.h:631
void pushTerminate()
Push a terminate handler on the stack.
Definition: CGCleanup.cpp:243
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:352
LValue - This represents an lvalue references.
Definition: CGValue.h:181
CharUnits getAlignment() const
Definition: CGValue.h:346
const Qualifiers & getQuals() const
Definition: CGValue.h:341
Address getAddress() const
Definition: CGValue.h:370
LValueBaseInfo getBaseInfo() const
Definition: CGValue.h:349
llvm::Value * getPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:361
QualType getType() const
Definition: CGValue.h:294
TBAAAccessInfo getTBAAInfo() const
Definition: CGValue.h:338
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:41
static RValue get(llvm::Value *V)
Definition: CGValue.h:97
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:107
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:70
An abstract representation of an aligned address.
Definition: Address.h:41
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:76
llvm::Value * getPointer() const
Definition: Address.h:65
static RawAddress invalid()
Definition: Address.h:60
bool isValid() const
Definition: Address.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const
setTargetAttributes - Provides a convenient hook to handle extra target-specific attributes for the g...
Definition: TargetInfo.h:75
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
Definition: StmtVisitor.h:195
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition: DeclBase.h:1436
void addDecl(Decl *D)
Add the declaration D into this context.
Definition: DeclBase.cpp:1716
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1260
ValueDecl * getDecl()
Definition: Expr.h:1328
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:86
T * getAttr() const
Definition: DeclBase.h:579
bool hasAttrs() const
Definition: DeclBase.h:524
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:501
void addAttr(Attr *A)
Definition: DeclBase.cpp:991
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:565
SourceLocation getLocation() const
Definition: DeclBase.h:445
DeclContext * getDeclContext()
Definition: DeclBase.h:454
AttrVec & getAttrs()
Definition: DeclBase.h:530
bool hasAttr() const
Definition: DeclBase.h:583
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition: DeclBase.h:968
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Decl.h:822
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1547
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition: Diagnostic.h:873
The return type of classify().
Definition: Expr.h:330
This represents one expression.
Definition: Expr.h:110
bool isGLValue() const
Definition: Expr.h:280
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition: Expr.h:671
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition: Expr.h:669
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3064
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3059
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3556
bool isIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition: Expr.cpp:3911
Represents a member of a struct/union/class.
Definition: Decl.h:3057
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition: Decl.cpp:4545
Represents a function declaration or definition.
Definition: Decl.h:1971
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2706
QualType getReturnType() const
Definition: Decl.h:2754
ArrayRef< ParmVarDecl * > parameters() const
Definition: Decl.h:2683
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:3617
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:3692
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5379
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition: Expr.cpp:977
Describes the capture of a variable or of this, or of a C++1y init-capture.
Definition: LambdaCapture.h:25
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
Definition: LangOptions.h:539
std::vector< llvm::Triple > OMPTargetTriples
Triples of the OpenMP targets that the host code codegen should take into account in order to generat...
Definition: LangOptions.h:535
virtual void mangleCanonicalTypeName(QualType T, raw_ostream &, bool NormalizeIntegers=false)=0
Generates a unique string for an externally visible type for use with TBAA or type uniquing.
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3172
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition: Expr.h:3255
Expr * getBase() const
Definition: Expr.h:3249
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
bool isExternallyVisible() const
Definition: Decl.h:408
This represents clause 'affinity' in the '#pragma omp task'-based directives.
Class that represents a component of a mappable expression.
ArrayRef< MappableComponent > MappableExprComponentListRef
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
Definition: OpenMPClause.h:219
This is a basic class for representing single OpenMP clause.
Definition: OpenMPClause.h:55
This represents '#pragma omp declare mapper ...' directive.
Definition: DeclOpenMP.h:287
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition: DeclOpenMP.h:349
This represents '#pragma omp declare reduction ...' directive.
Definition: DeclOpenMP.h:177
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition: DeclOpenMP.h:238
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition: DeclOpenMP.h:249
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition: DeclOpenMP.h:226
Expr * getCombinerIn()
Get In variable of the combiner.
Definition: DeclOpenMP.h:223
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition: DeclOpenMP.h:220
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition: DeclOpenMP.h:246
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition: DeclOpenMP.h:241
This represents implicit clause 'depend' for the '#pragma omp task' directive.
This represents 'detach' clause in the '#pragma omp task' directive.
This represents 'device' clause in the '#pragma omp ...' directive.
This represents the 'doacross' clause for the '#pragma omp ordered' directive.
This is a basic class for representing single OpenMP executable directive.
Definition: StmtOpenMP.h:266
CapturedStmt * getInnermostCapturedStmt()
Get innermost captured statement for the construct.
Definition: StmtOpenMP.h:556
const CapturedStmt * getCapturedStmt(OpenMPDirectiveKind RegionKind) const
Returns the captured statement associated with the component region within the (combined) directive.
Definition: StmtOpenMP.h:547
OpenMPDirectiveKind getDirectiveKind() const
Definition: StmtOpenMP.h:569
SourceLocation getBeginLoc() const
Returns starting location of directive kind.
Definition: StmtOpenMP.h:502
bool hasClausesOfKind() const
Returns true if the current directive has one or more clauses of a specific kind.
Definition: StmtOpenMP.h:496
SourceLocation getEndLoc() const
Returns ending location of directive.
Definition: StmtOpenMP.h:504
static const SpecificClause * getSingleClause(ArrayRef< OMPClause * > Clauses)
Gets a single clause of the specified kind associated with the current directive iff there is only on...
Definition: StmtOpenMP.h:477
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)
Definition: StmtOpenMP.h:459
This represents clause 'firstprivate' in the '#pragma omp ...' directives.
This represents clause 'has_device_ptr' in the '#pragma omp ...' directives.
This represents 'if' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:527
Expr * getCondition() const
Returns condition.
Definition: OpenMPClause.h:596
This represents clause 'in_reduction' in the '#pragma omp task' directives.
This represents clause 'is_device_ptr' in the '#pragma omp ...' directives.
OpenMP 5.0 [2.1.6 Iterators] Iterators are identifiers that expand to multiple values in the clause o...
Definition: ExprOpenMP.h:151
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition: Expr.cpp:5251
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition: ExprOpenMP.h:275
Decl * getIteratorDecl(unsigned I)
Gets the iterator declaration for the given iterator.
Definition: Expr.cpp:5208
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
This represents clause 'linear' in the '#pragma omp ...' directives.
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc....
Definition: StmtOpenMP.h:1018
Expr * getStrideVariable() const
Definition: StmtOpenMP.h:1376
Expr * getUpperBoundVariable() const
Definition: StmtOpenMP.h:1368
Expr * getLowerBoundVariable() const
Definition: StmtOpenMP.h:1360
This represents clause 'map' in the '#pragma omp ...' directives.
This represents clause 'nontemporal' in the '#pragma omp ...' directives.
This represents 'nowait' clause in the '#pragma omp ...' directive.
This represents 'num_teams' clause in the '#pragma omp ...' directive.
This represents 'num_threads' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:676
This represents 'ordered' clause in the '#pragma omp ...' directive.
This represents clause 'private' in the '#pragma omp ...' directives.
This represents clause 'reduction' in the '#pragma omp ...' directives.
This represents '#pragma omp requires...' directive.
Definition: DeclOpenMP.h:417
clauselist_range clauselists()
Definition: DeclOpenMP.h:442
This represents 'thread_limit' clause in the '#pragma omp ...' directive.
This represents clause 'uses_allocators' in the '#pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_dyn_cgroup_mem' clause in the '#pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition: Expr.h:1168
Represents a parameter to a function.
Definition: Decl.h:1761
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3139
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition: Type.h:940
void addRestrict()
Add the restrict qualifier to this QualType.
Definition: Type.h:1167
QualType withRestrict() const
Definition: Type.h:1170
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:1007
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:7359
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition: Type.h:7399
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:7560
QualType getCanonicalType() const
Definition: Type.h:7411
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition: Type.h:1530
Represents a struct/union/class.
Definition: Decl.h:4168
field_iterator field_end() const
Definition: Decl.h:4377
field_range fields() const
Definition: Decl.h:4374
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition: Decl.cpp:5081
bool field_empty() const
Definition: Decl.h:4382
field_iterator field_begin() const
Definition: Decl.cpp:5069
RecordDecl * getDecl() const
Definition: Type.h:5559
decl_type * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
Definition: Redeclarable.h:204
decl_type * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
Definition: Redeclarable.h:226
Base for LValueReferenceType and RValueReferenceType.
Definition: Type.h:3380
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
fileinfo_iterator fileinfo_end() const
SourceLocation translateFileLineCol(const FileEntry *SourceFile, unsigned Line, unsigned Col) const
Get the source location for the given file:line:col triplet.
fileinfo_iterator fileinfo_begin() const
A trivial tuple used to represent a source range.
Stmt - This represents one statement.
Definition: Stmt.h:84
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:326
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition: Stmt.cpp:197
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:338
void startDefinition()
Starts the definition of this tag declaration.
Definition: Decl.cpp:4737
bool isUnion() const
Definition: Decl.h:3790
bool isTLSSupported() const
Whether the target supports thread-local storage.
Definition: TargetInfo.h:1561
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
Definition: TargetInfo.h:1472
The base class of the type hierarchy.
Definition: Type.h:1813
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1871
bool isVoidType() const
Definition: Type.h:7905
bool isSignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is signed or an enumeration types whose underlying ty...
Definition: Type.cpp:2156
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition: Type.h:8083
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2135
bool isArrayType() const
Definition: Type.h:7678
bool isPointerType() const
Definition: Type.h:7612
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:7945
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8193
bool isReferenceType() const
Definition: Type.h:7624
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:695
bool isLValueReferenceType() const
Definition: Type.h:7628
QualType getCanonicalTypeInternal() const
Definition: Type.h:2936
const RecordType * getAsStructureType() const
Definition: Type.cpp:711
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition: Type.h:8076
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition: Type.h:2671
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition: Type.h:8179
bool isFloatingType() const
Definition: Type.cpp:2238
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2185
bool isAnyPointerType() const
Definition: Type.h:7616
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8126
bool isRecordType() const
Definition: Type.h:7706
bool isUnionType() const
Definition: Type.cpp:661
TagDecl * getAsTagDecl() const
Retrieves the TagDecl that this type refers to, either because the type is a TagType or because it is...
Definition: Type.cpp:1879
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1875
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:706
QualType getType() const
Definition: Decl.h:717
Represents a variable declaration or definition.
Definition: Decl.h:918
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:2254
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition: Decl.cpp:2363
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition: Decl.h:1204
bool hasLocalStorage() const
Returns true if a variable with function scope is a non-static local variable.
Definition: Decl.h:1171
@ DeclarationOnly
This declaration is only a declaration.
Definition: Decl.h:1282
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition: Decl.cpp:2372
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition: Decl.h:1249
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition: Decl.h:1345
Represents a C array with a specified size that is not an integer-constant-expression.
Definition: Type.h:3747
Expr * getSizeExpr() const
Definition: Type.h:3766
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition: AttrIterator.h:33
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
@ NotKnownNonNull
Definition: Address.h:32
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Ctor_Complete
Complete object ctor.
Definition: ABI.h:25
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition: OpenMPKinds.h:24
@ ICIS_NoInit
No in-class initializer.
Definition: Specifiers.h:269
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition: Lambda.h:37
BinaryOperatorKind
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition: OpenMPKinds.h:38
@ OMPC_SCHEDULE_MODIFIER_last
Definition: OpenMPKinds.h:43
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition: OpenMPKinds.h:39
@ CR_OpenMP
Definition: CapturedStmt.h:19
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Definition: OpenMPKinds.h:103
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ Result
The result type of a method or function.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition: OpenMPKinds.h:54
@ OMPC_DEPEND_unknown
Definition: OpenMPKinds.h:58
@ Dtor_Complete
Complete object dtor.
Definition: ABI.h:35
@ Union
The "union" keyword.
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition: Specifiers.h:132
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:136
const FunctionProtoType * T
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
Definition: OpenMPKinds.h:131
@ OMPC_DEVICE_unknown
Definition: OpenMPKinds.h:50
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition: OpenMPKinds.h:78
@ OMPC_MAP_MODIFIER_unknown
Definition: OpenMPKinds.h:79
@ Other
Other implicit parameter.
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition: OpenMPKinds.h:30
@ OMPC_SCHEDULE_unknown
Definition: OpenMPKinds.h:34
@ AS_public
Definition: Specifiers.h:121
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition: OpenMPKinds.h:70
@ OMPC_MAP_unknown
Definition: OpenMPKinds.h:74
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define false
Definition: stdbool.h:26
#define bool
Definition: stdbool.h:24
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
llvm::IntegerType * IntTy
int
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
Extra information about a function prototype.
Definition: Type.h:4735
Helper expressions and declaration for OMPIteratorExpr class for each iteration space.
Definition: ExprOpenMP.h:111
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition: ExprOpenMP.h:121
Expr * Upper
Normalized upper bound.
Definition: ExprOpenMP.h:116
Expr * Update
Update expression for the originally specified iteration variable, calculated as VD = Begin + Counter...
Definition: ExprOpenMP.h:119
VarDecl * CounterVD
Internal normalized counter.
Definition: ExprOpenMP.h:113
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
Scheduling data for loop-based OpenMP directives.
Definition: OpenMPKinds.h:179
OpenMPScheduleClauseModifier M2
Definition: OpenMPKinds.h:182
OpenMPScheduleClauseModifier M1
Definition: OpenMPKinds.h:181
OpenMPScheduleClauseKind Schedule
Definition: OpenMPKinds.h:180
Describes how types, statements, expressions, and declarations should be printed.
Definition: PrettyPrinter.h:57