clang 20.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGRecordLayout.h"
18#include "CodeGenFunction.h"
19#include "TargetInfo.h"
20#include "clang/AST/APValue.h"
21#include "clang/AST/Attr.h"
22#include "clang/AST/Decl.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/ADT/StringExtras.h"
32#include "llvm/Bitcode/BitcodeReader.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/GlobalValue.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Value.h"
38#include "llvm/Support/AtomicOrdering.h"
39#include "llvm/Support/raw_ostream.h"
40#include <cassert>
41#include <cstdint>
42#include <numeric>
43#include <optional>
44
45using namespace clang;
46using namespace CodeGen;
47using namespace llvm::omp;
48
49namespace {
50/// Base class for handling code generation inside OpenMP regions.
51class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
52public:
53 /// Kinds of OpenMP regions used in codegen.
54 enum CGOpenMPRegionKind {
55 /// Region with outlined function for standalone 'parallel'
56 /// directive.
57 ParallelOutlinedRegion,
58 /// Region with outlined function for standalone 'task' directive.
59 TaskOutlinedRegion,
60 /// Region for constructs that do not require function outlining,
61 /// like 'for', 'sections', 'atomic' etc. directives.
62 InlinedRegion,
63 /// Region with outlined function for standalone 'target' directive.
64 TargetRegion,
65 };
66
67 CGOpenMPRegionInfo(const CapturedStmt &CS,
68 const CGOpenMPRegionKind RegionKind,
69 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
70 bool HasCancel)
71 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
72 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
73
74 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
75 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
76 bool HasCancel)
77 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
78 Kind(Kind), HasCancel(HasCancel) {}
79
80 /// Get a variable or parameter for storing global thread id
81 /// inside OpenMP construct.
82 virtual const VarDecl *getThreadIDVariable() const = 0;
83
84 /// Emit the captured statement body.
85 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
86
87 /// Get an LValue for the current ThreadID variable.
88 /// \return LValue for thread id variable. This LValue always has type int32*.
89 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
90
91 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
92
93 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
94
95 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
96
97 bool hasCancel() const { return HasCancel; }
98
99 static bool classof(const CGCapturedStmtInfo *Info) {
100 return Info->getKind() == CR_OpenMP;
101 }
102
103 ~CGOpenMPRegionInfo() override = default;
104
105protected:
106 CGOpenMPRegionKind RegionKind;
107 RegionCodeGenTy CodeGen;
109 bool HasCancel;
110};
111
112/// API for captured statement code generation in OpenMP constructs.
113class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
114public:
115 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
116 const RegionCodeGenTy &CodeGen,
117 OpenMPDirectiveKind Kind, bool HasCancel,
118 StringRef HelperName)
119 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
120 HasCancel),
121 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
122 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
123 }
124
125 /// Get a variable or parameter for storing global thread id
126 /// inside OpenMP construct.
127 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
128
129 /// Get the name of the capture helper.
130 StringRef getHelperName() const override { return HelperName; }
131
132 static bool classof(const CGCapturedStmtInfo *Info) {
133 return CGOpenMPRegionInfo::classof(Info) &&
134 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
135 ParallelOutlinedRegion;
136 }
137
138private:
139 /// A variable or parameter storing global thread id for OpenMP
140 /// constructs.
141 const VarDecl *ThreadIDVar;
142 StringRef HelperName;
143};
144
145/// API for captured statement code generation in OpenMP constructs.
146class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
147public:
148 class UntiedTaskActionTy final : public PrePostActionTy {
149 bool Untied;
150 const VarDecl *PartIDVar;
151 const RegionCodeGenTy UntiedCodeGen;
152 llvm::SwitchInst *UntiedSwitch = nullptr;
153
154 public:
155 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
156 const RegionCodeGenTy &UntiedCodeGen)
157 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
158 void Enter(CodeGenFunction &CGF) override {
159 if (Untied) {
160 // Emit task switching point.
161 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
162 CGF.GetAddrOfLocalVar(PartIDVar),
163 PartIDVar->getType()->castAs<PointerType>());
164 llvm::Value *Res =
165 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
166 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
167 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
168 CGF.EmitBlock(DoneBB);
170 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
171 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
172 CGF.Builder.GetInsertBlock());
173 emitUntiedSwitch(CGF);
174 }
175 }
176 void emitUntiedSwitch(CodeGenFunction &CGF) const {
177 if (Untied) {
178 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
179 CGF.GetAddrOfLocalVar(PartIDVar),
180 PartIDVar->getType()->castAs<PointerType>());
181 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
182 PartIdLVal);
183 UntiedCodeGen(CGF);
184 CodeGenFunction::JumpDest CurPoint =
185 CGF.getJumpDestInCurrentScope(".untied.next.");
187 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
188 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
189 CGF.Builder.GetInsertBlock());
190 CGF.EmitBranchThroughCleanup(CurPoint);
191 CGF.EmitBlock(CurPoint.getBlock());
192 }
193 }
194 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
195 };
196 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
197 const VarDecl *ThreadIDVar,
198 const RegionCodeGenTy &CodeGen,
199 OpenMPDirectiveKind Kind, bool HasCancel,
200 const UntiedTaskActionTy &Action)
201 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
202 ThreadIDVar(ThreadIDVar), Action(Action) {
203 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
204 }
205
206 /// Get a variable or parameter for storing global thread id
207 /// inside OpenMP construct.
208 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
209
210 /// Get an LValue for the current ThreadID variable.
211 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
212
213 /// Get the name of the capture helper.
214 StringRef getHelperName() const override { return ".omp_outlined."; }
215
216 void emitUntiedSwitch(CodeGenFunction &CGF) override {
217 Action.emitUntiedSwitch(CGF);
218 }
219
220 static bool classof(const CGCapturedStmtInfo *Info) {
221 return CGOpenMPRegionInfo::classof(Info) &&
222 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
223 TaskOutlinedRegion;
224 }
225
226private:
227 /// A variable or parameter storing global thread id for OpenMP
228 /// constructs.
229 const VarDecl *ThreadIDVar;
230 /// Action for emitting code for untied tasks.
231 const UntiedTaskActionTy &Action;
232};
233
234/// API for inlined captured statement code generation in OpenMP
235/// constructs.
236class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
237public:
238 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
239 const RegionCodeGenTy &CodeGen,
240 OpenMPDirectiveKind Kind, bool HasCancel)
241 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
242 OldCSI(OldCSI),
243 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
244
245 // Retrieve the value of the context parameter.
246 llvm::Value *getContextValue() const override {
247 if (OuterRegionInfo)
248 return OuterRegionInfo->getContextValue();
249 llvm_unreachable("No context value for inlined OpenMP region");
250 }
251
252 void setContextValue(llvm::Value *V) override {
253 if (OuterRegionInfo) {
254 OuterRegionInfo->setContextValue(V);
255 return;
256 }
257 llvm_unreachable("No context value for inlined OpenMP region");
258 }
259
260 /// Lookup the captured field decl for a variable.
261 const FieldDecl *lookup(const VarDecl *VD) const override {
262 if (OuterRegionInfo)
263 return OuterRegionInfo->lookup(VD);
264 // If there is no outer outlined region,no need to lookup in a list of
265 // captured variables, we can use the original one.
266 return nullptr;
267 }
268
269 FieldDecl *getThisFieldDecl() const override {
270 if (OuterRegionInfo)
271 return OuterRegionInfo->getThisFieldDecl();
272 return nullptr;
273 }
274
275 /// Get a variable or parameter for storing global thread id
276 /// inside OpenMP construct.
277 const VarDecl *getThreadIDVariable() const override {
278 if (OuterRegionInfo)
279 return OuterRegionInfo->getThreadIDVariable();
280 return nullptr;
281 }
282
283 /// Get an LValue for the current ThreadID variable.
284 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
285 if (OuterRegionInfo)
286 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
287 llvm_unreachable("No LValue for inlined OpenMP construct");
288 }
289
290 /// Get the name of the capture helper.
291 StringRef getHelperName() const override {
292 if (auto *OuterRegionInfo = getOldCSI())
293 return OuterRegionInfo->getHelperName();
294 llvm_unreachable("No helper name for inlined OpenMP construct");
295 }
296
297 void emitUntiedSwitch(CodeGenFunction &CGF) override {
298 if (OuterRegionInfo)
299 OuterRegionInfo->emitUntiedSwitch(CGF);
300 }
301
302 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
303
304 static bool classof(const CGCapturedStmtInfo *Info) {
305 return CGOpenMPRegionInfo::classof(Info) &&
306 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
307 }
308
309 ~CGOpenMPInlinedRegionInfo() override = default;
310
311private:
312 /// CodeGen info about outer OpenMP region.
313 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
314 CGOpenMPRegionInfo *OuterRegionInfo;
315};
316
317/// API for captured statement code generation in OpenMP target
318/// constructs. For this captures, implicit parameters are used instead of the
319/// captured fields. The name of the target region has to be unique in a given
320/// application so it is provided by the client, because only the client has
321/// the information to generate that.
322class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
323public:
324 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
325 const RegionCodeGenTy &CodeGen, StringRef HelperName)
326 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
327 /*HasCancel=*/false),
328 HelperName(HelperName) {}
329
330 /// This is unused for target regions because each starts executing
331 /// with a single thread.
332 const VarDecl *getThreadIDVariable() const override { return nullptr; }
333
334 /// Get the name of the capture helper.
335 StringRef getHelperName() const override { return HelperName; }
336
337 static bool classof(const CGCapturedStmtInfo *Info) {
338 return CGOpenMPRegionInfo::classof(Info) &&
339 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
340 }
341
342private:
343 StringRef HelperName;
344};
345
346static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
347 llvm_unreachable("No codegen for expressions");
348}
349/// API for generation of expressions captured in a innermost OpenMP
350/// region.
351class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
352public:
353 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
354 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
355 OMPD_unknown,
356 /*HasCancel=*/false),
357 PrivScope(CGF) {
358 // Make sure the globals captured in the provided statement are local by
359 // using the privatization logic. We assume the same variable is not
360 // captured more than once.
361 for (const auto &C : CS.captures()) {
362 if (!C.capturesVariable() && !C.capturesVariableByCopy())
363 continue;
364
365 const VarDecl *VD = C.getCapturedVar();
366 if (VD->isLocalVarDeclOrParm())
367 continue;
368
369 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
370 /*RefersToEnclosingVariableOrCapture=*/false,
372 C.getLocation());
373 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
374 }
375 (void)PrivScope.Privatize();
376 }
377
378 /// Lookup the captured field decl for a variable.
379 const FieldDecl *lookup(const VarDecl *VD) const override {
380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381 return FD;
382 return nullptr;
383 }
384
385 /// Emit the captured statement body.
386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387 llvm_unreachable("No body for expressions");
388 }
389
390 /// Get a variable or parameter for storing global thread id
391 /// inside OpenMP construct.
392 const VarDecl *getThreadIDVariable() const override {
393 llvm_unreachable("No thread id for expressions");
394 }
395
396 /// Get the name of the capture helper.
397 StringRef getHelperName() const override {
398 llvm_unreachable("No helper name for expressions");
399 }
400
401 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
402
403private:
404 /// Private scope to capture global variables.
405 CodeGenFunction::OMPPrivateScope PrivScope;
406};
407
408/// RAII for emitting code of OpenMP constructs.
409class InlinedOpenMPRegionRAII {
410 CodeGenFunction &CGF;
411 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
412 FieldDecl *LambdaThisCaptureField = nullptr;
413 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414 bool NoInheritance = false;
415
416public:
417 /// Constructs region for combined constructs.
418 /// \param CodeGen Code generation sequence for combined directives. Includes
419 /// a list of functions used for code generation of implicitly inlined
420 /// regions.
421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422 OpenMPDirectiveKind Kind, bool HasCancel,
423 bool NoInheritance = true)
424 : CGF(CGF), NoInheritance(NoInheritance) {
425 // Start emission for the construct.
426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428 if (NoInheritance) {
429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431 CGF.LambdaThisCaptureField = nullptr;
432 BlockInfo = CGF.BlockInfo;
433 CGF.BlockInfo = nullptr;
434 }
435 }
436
437 ~InlinedOpenMPRegionRAII() {
438 // Restore original CapturedStmtInfo only if we're done with code emission.
439 auto *OldCSI =
440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441 delete CGF.CapturedStmtInfo;
442 CGF.CapturedStmtInfo = OldCSI;
443 if (NoInheritance) {
444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446 CGF.BlockInfo = BlockInfo;
447 }
448 }
449};
450
451/// Values for bit flags used in the ident_t to describe the fields.
452/// All enumeric elements are named and described in accordance with the code
453/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454enum OpenMPLocationFlags : unsigned {
455 /// Use trampoline for internal microtask.
456 OMP_IDENT_IMD = 0x01,
457 /// Use c-style ident structure.
458 OMP_IDENT_KMPC = 0x02,
459 /// Atomic reduction option for kmpc_reduce.
460 OMP_ATOMIC_REDUCE = 0x10,
461 /// Explicit 'barrier' directive.
462 OMP_IDENT_BARRIER_EXPL = 0x20,
463 /// Implicit barrier in code.
464 OMP_IDENT_BARRIER_IMPL = 0x40,
465 /// Implicit barrier in 'for' directive.
466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467 /// Implicit barrier in 'sections' directive.
468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469 /// Implicit barrier in 'single' directive.
470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471 /// Call of __kmp_for_static_init for static loop.
472 OMP_IDENT_WORK_LOOP = 0x200,
473 /// Call of __kmp_for_static_init for sections.
474 OMP_IDENT_WORK_SECTIONS = 0x400,
475 /// Call of __kmp_for_static_init for distribute.
476 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
478};
479
480/// Describes ident structure that describes a source location.
481/// All descriptions are taken from
482/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
483/// Original structure:
484/// typedef struct ident {
485/// kmp_int32 reserved_1; /**< might be used in Fortran;
486/// see above */
487/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
488/// KMP_IDENT_KMPC identifies this union
489/// member */
490/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
491/// see above */
492///#if USE_ITT_BUILD
493/// /* but currently used for storing
494/// region-specific ITT */
495/// /* contextual information. */
496///#endif /* USE_ITT_BUILD */
497/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
498/// C++ */
499/// char const *psource; /**< String describing the source location.
500/// The string is composed of semi-colon separated
501// fields which describe the source file,
502/// the function and a pair of line numbers that
503/// delimit the construct.
504/// */
505/// } ident_t;
506enum IdentFieldIndex {
507 /// might be used in Fortran
508 IdentField_Reserved_1,
509 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
510 IdentField_Flags,
511 /// Not really used in Fortran any more
512 IdentField_Reserved_2,
513 /// Source[4] in Fortran, do not use for C++
514 IdentField_Reserved_3,
515 /// String describing the source location. The string is composed of
516 /// semi-colon separated fields which describe the source file, the function
517 /// and a pair of line numbers that delimit the construct.
518 IdentField_PSource
519};
520
521/// Schedule types for 'omp for' loops (these enumerators are taken from
522/// the enum sched_type in kmp.h).
523enum OpenMPSchedType {
524 /// Lower bound for default (unordered) versions.
525 OMP_sch_lower = 32,
526 OMP_sch_static_chunked = 33,
527 OMP_sch_static = 34,
528 OMP_sch_dynamic_chunked = 35,
529 OMP_sch_guided_chunked = 36,
530 OMP_sch_runtime = 37,
531 OMP_sch_auto = 38,
532 /// static with chunk adjustment (e.g., simd)
533 OMP_sch_static_balanced_chunked = 45,
534 /// Lower bound for 'ordered' versions.
535 OMP_ord_lower = 64,
536 OMP_ord_static_chunked = 65,
537 OMP_ord_static = 66,
538 OMP_ord_dynamic_chunked = 67,
539 OMP_ord_guided_chunked = 68,
540 OMP_ord_runtime = 69,
541 OMP_ord_auto = 70,
542 OMP_sch_default = OMP_sch_static,
543 /// dist_schedule types
544 OMP_dist_sch_static_chunked = 91,
545 OMP_dist_sch_static = 92,
546 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
547 /// Set if the monotonic schedule modifier was present.
548 OMP_sch_modifier_monotonic = (1 << 29),
549 /// Set if the nonmonotonic schedule modifier was present.
550 OMP_sch_modifier_nonmonotonic = (1 << 30),
551};
552
553/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
554/// region.
555class CleanupTy final : public EHScopeStack::Cleanup {
556 PrePostActionTy *Action;
557
558public:
559 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
560 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
561 if (!CGF.HaveInsertPoint())
562 return;
563 Action->Exit(CGF);
564 }
565};
566
567} // anonymous namespace
568
571 if (PrePostAction) {
572 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
573 Callback(CodeGen, CGF, *PrePostAction);
574 } else {
575 PrePostActionTy Action;
576 Callback(CodeGen, CGF, Action);
577 }
578}
579
580/// Check if the combiner is a call to UDR combiner and if it is so return the
581/// UDR decl used for reduction.
582static const OMPDeclareReductionDecl *
583getReductionInit(const Expr *ReductionOp) {
584 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
585 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
586 if (const auto *DRE =
587 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
588 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
589 return DRD;
590 return nullptr;
591}
592
594 const OMPDeclareReductionDecl *DRD,
595 const Expr *InitOp,
596 Address Private, Address Original,
597 QualType Ty) {
598 if (DRD->getInitializer()) {
599 std::pair<llvm::Function *, llvm::Function *> Reduction =
601 const auto *CE = cast<CallExpr>(InitOp);
602 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
603 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
604 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
605 const auto *LHSDRE =
606 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
607 const auto *RHSDRE =
608 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
609 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
610 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
611 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
612 (void)PrivateScope.Privatize();
614 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
615 CGF.EmitIgnoredExpr(InitOp);
616 } else {
617 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
618 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
619 auto *GV = new llvm::GlobalVariable(
620 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
621 llvm::GlobalValue::PrivateLinkage, Init, Name);
622 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
623 RValue InitRVal;
624 switch (CGF.getEvaluationKind(Ty)) {
625 case TEK_Scalar:
626 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
627 break;
628 case TEK_Complex:
629 InitRVal =
631 break;
632 case TEK_Aggregate: {
633 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
634 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
635 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
636 /*IsInitializer=*/false);
637 return;
638 }
639 }
640 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
641 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
642 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
643 /*IsInitializer=*/false);
644 }
645}
646
647/// Emit initialization of arrays of complex types.
648/// \param DestAddr Address of the array.
649/// \param Type Type of array.
650/// \param Init Initial expression of array.
651/// \param SrcAddr Address of the original array.
653 QualType Type, bool EmitDeclareReductionInit,
654 const Expr *Init,
655 const OMPDeclareReductionDecl *DRD,
656 Address SrcAddr = Address::invalid()) {
657 // Perform element-by-element initialization.
658 QualType ElementTy;
659
660 // Drill down to the base element type on both arrays.
661 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
662 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
663 if (DRD)
664 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
665
666 llvm::Value *SrcBegin = nullptr;
667 if (DRD)
668 SrcBegin = SrcAddr.emitRawPointer(CGF);
669 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
670 // Cast from pointer to array type to pointer to single element.
671 llvm::Value *DestEnd =
672 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
673 // The basic structure here is a while-do loop.
674 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
675 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
676 llvm::Value *IsEmpty =
677 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
678 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
679
680 // Enter the loop body, making that address the current address.
681 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
682 CGF.EmitBlock(BodyBB);
683
684 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
685
686 llvm::PHINode *SrcElementPHI = nullptr;
687 Address SrcElementCurrent = Address::invalid();
688 if (DRD) {
689 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
690 "omp.arraycpy.srcElementPast");
691 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
692 SrcElementCurrent =
693 Address(SrcElementPHI, SrcAddr.getElementType(),
694 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
695 }
696 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
697 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
698 DestElementPHI->addIncoming(DestBegin, EntryBB);
699 Address DestElementCurrent =
700 Address(DestElementPHI, DestAddr.getElementType(),
701 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
702
703 // Emit copy.
704 {
705 CodeGenFunction::RunCleanupsScope InitScope(CGF);
706 if (EmitDeclareReductionInit) {
707 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
708 SrcElementCurrent, ElementTy);
709 } else
710 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
711 /*IsInitializer=*/false);
712 }
713
714 if (DRD) {
715 // Shift the address forward by one element.
716 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
717 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
718 "omp.arraycpy.dest.element");
719 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
720 }
721
722 // Shift the address forward by one element.
723 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
724 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
725 "omp.arraycpy.dest.element");
726 // Check whether we've reached the end.
727 llvm::Value *Done =
728 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
729 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
730 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
731
732 // Done.
733 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
734}
735
736LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
737 return CGF.EmitOMPSharedLValue(E);
738}
739
740LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
741 const Expr *E) {
742 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
743 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
744 return LValue();
745}
746
747void ReductionCodeGen::emitAggregateInitialization(
748 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
749 const OMPDeclareReductionDecl *DRD) {
750 // Emit VarDecl with copy init for arrays.
751 // Get the address of the original variable captured in current
752 // captured region.
753 const auto *PrivateVD =
754 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
755 bool EmitDeclareReductionInit =
756 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
757 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
758 EmitDeclareReductionInit,
759 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
760 : PrivateVD->getInit(),
761 DRD, SharedAddr);
762}
763
766 ArrayRef<const Expr *> Privates,
767 ArrayRef<const Expr *> ReductionOps) {
768 ClausesData.reserve(Shareds.size());
769 SharedAddresses.reserve(Shareds.size());
770 Sizes.reserve(Shareds.size());
771 BaseDecls.reserve(Shareds.size());
772 const auto *IOrig = Origs.begin();
773 const auto *IPriv = Privates.begin();
774 const auto *IRed = ReductionOps.begin();
775 for (const Expr *Ref : Shareds) {
776 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
777 std::advance(IOrig, 1);
778 std::advance(IPriv, 1);
779 std::advance(IRed, 1);
780 }
781}
782
784 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
785 "Number of generated lvalues must be exactly N.");
786 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
787 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
788 SharedAddresses.emplace_back(First, Second);
789 if (ClausesData[N].Shared == ClausesData[N].Ref) {
790 OrigAddresses.emplace_back(First, Second);
791 } else {
792 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
793 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
794 OrigAddresses.emplace_back(First, Second);
795 }
796}
797
799 QualType PrivateType = getPrivateType(N);
800 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
801 if (!PrivateType->isVariablyModifiedType()) {
802 Sizes.emplace_back(
803 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
804 nullptr);
805 return;
806 }
807 llvm::Value *Size;
808 llvm::Value *SizeInChars;
809 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
810 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
811 if (AsArraySection) {
812 Size = CGF.Builder.CreatePtrDiff(ElemType,
813 OrigAddresses[N].second.getPointer(CGF),
814 OrigAddresses[N].first.getPointer(CGF));
815 Size = CGF.Builder.CreateNUWAdd(
816 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
817 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
818 } else {
819 SizeInChars =
820 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
821 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
822 }
823 Sizes.emplace_back(SizeInChars, Size);
825 CGF,
826 cast<OpaqueValueExpr>(
827 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
828 RValue::get(Size));
829 CGF.EmitVariablyModifiedType(PrivateType);
830}
831
833 llvm::Value *Size) {
834 QualType PrivateType = getPrivateType(N);
835 if (!PrivateType->isVariablyModifiedType()) {
836 assert(!Size && !Sizes[N].second &&
837 "Size should be nullptr for non-variably modified reduction "
838 "items.");
839 return;
840 }
842 CGF,
843 cast<OpaqueValueExpr>(
844 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
845 RValue::get(Size));
846 CGF.EmitVariablyModifiedType(PrivateType);
847}
848
850 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
851 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
852 assert(SharedAddresses.size() > N && "No variable was generated");
853 const auto *PrivateVD =
854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855 const OMPDeclareReductionDecl *DRD =
856 getReductionInit(ClausesData[N].ReductionOp);
857 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
858 if (DRD && DRD->getInitializer())
859 (void)DefaultInit(CGF);
860 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
861 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
862 (void)DefaultInit(CGF);
863 QualType SharedType = SharedAddresses[N].first.getType();
864 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
865 PrivateAddr, SharedAddr, SharedType);
866 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
867 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
868 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
869 PrivateVD->getType().getQualifiers(),
870 /*IsInitializer=*/false);
871 }
872}
873
875 QualType PrivateType = getPrivateType(N);
876 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
877 return DTorKind != QualType::DK_none;
878}
879
881 Address PrivateAddr) {
882 QualType PrivateType = getPrivateType(N);
883 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
884 if (needCleanups(N)) {
885 PrivateAddr =
886 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
887 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
888 }
889}
890
892 LValue BaseLV) {
893 BaseTy = BaseTy.getNonReferenceType();
894 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
895 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
896 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
897 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
898 } else {
899 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
900 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
901 }
902 BaseTy = BaseTy->getPointeeType();
903 }
904 return CGF.MakeAddrLValue(
905 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
906 BaseLV.getType(), BaseLV.getBaseInfo(),
907 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
908}
909
911 Address OriginalBaseAddress, llvm::Value *Addr) {
913 Address TopTmp = Address::invalid();
914 Address MostTopTmp = Address::invalid();
915 BaseTy = BaseTy.getNonReferenceType();
916 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
917 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
918 Tmp = CGF.CreateMemTemp(BaseTy);
919 if (TopTmp.isValid())
920 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
921 else
922 MostTopTmp = Tmp;
923 TopTmp = Tmp;
924 BaseTy = BaseTy->getPointeeType();
925 }
926
927 if (Tmp.isValid()) {
929 Addr, Tmp.getElementType());
930 CGF.Builder.CreateStore(Addr, Tmp);
931 return MostTopTmp;
932 }
933
935 Addr, OriginalBaseAddress.getType());
936 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
937}
938
939static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
940 const VarDecl *OrigVD = nullptr;
941 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
942 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
943 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
944 Base = TempOASE->getBase()->IgnoreParenImpCasts();
945 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
946 Base = TempASE->getBase()->IgnoreParenImpCasts();
947 DE = cast<DeclRefExpr>(Base);
948 OrigVD = cast<VarDecl>(DE->getDecl());
949 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
950 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
951 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
952 Base = TempASE->getBase()->IgnoreParenImpCasts();
953 DE = cast<DeclRefExpr>(Base);
954 OrigVD = cast<VarDecl>(DE->getDecl());
955 }
956 return OrigVD;
957}
958
960 Address PrivateAddr) {
961 const DeclRefExpr *DE;
962 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
963 BaseDecls.emplace_back(OrigVD);
964 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
965 LValue BaseLValue =
966 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
967 OriginalBaseLValue);
968 Address SharedAddr = SharedAddresses[N].first.getAddress();
969 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
970 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
971 SharedAddr.emitRawPointer(CGF));
972 llvm::Value *PrivatePointer =
974 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
975 llvm::Value *Ptr = CGF.Builder.CreateGEP(
976 SharedAddr.getElementType(), PrivatePointer, Adjustment);
977 return castToBase(CGF, OrigVD->getType(),
978 SharedAddresses[N].first.getType(),
979 OriginalBaseLValue.getAddress(), Ptr);
980 }
981 BaseDecls.emplace_back(
982 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
983 return PrivateAddr;
984}
985
987 const OMPDeclareReductionDecl *DRD =
988 getReductionInit(ClausesData[N].ReductionOp);
989 return DRD && DRD->getInitializer();
990}
991
992LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
993 return CGF.EmitLoadOfPointerLValue(
994 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
995 getThreadIDVariable()->getType()->castAs<PointerType>());
996}
997
998void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
999 if (!CGF.HaveInsertPoint())
1000 return;
1001 // 1.2.2 OpenMP Language Terminology
1002 // Structured block - An executable statement with a single entry at the
1003 // top and a single exit at the bottom.
1004 // The point of exit cannot be a branch out of the structured block.
1005 // longjmp() and throw() must not violate the entry/exit criteria.
1006 CGF.EHStack.pushTerminate();
1007 if (S)
1009 CodeGen(CGF);
1010 CGF.EHStack.popTerminate();
1011}
1012
1013LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1014 CodeGenFunction &CGF) {
1015 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1016 getThreadIDVariable()->getType(),
1018}
1019
1021 QualType FieldTy) {
1022 auto *Field = FieldDecl::Create(
1023 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1024 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1025 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1026 Field->setAccess(AS_public);
1027 DC->addDecl(Field);
1028 return Field;
1029}
1030
1032 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1033 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1034 llvm::OpenMPIRBuilderConfig Config(
1035 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1036 CGM.getLangOpts().OpenMPOffloadMandatory,
1037 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1038 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1039 OMPBuilder.initialize();
1040 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1042 : StringRef{});
1043 OMPBuilder.setConfig(Config);
1044
1045 // The user forces the compiler to behave as if omp requires
1046 // unified_shared_memory was given.
1047 if (CGM.getLangOpts().OpenMPForceUSM) {
1049 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1050 }
1051}
1052
1054 InternalVars.clear();
1055 // Clean non-target variable declarations possibly used only in debug info.
1056 for (const auto &Data : EmittedNonTargetVariables) {
1057 if (!Data.getValue().pointsToAliveValue())
1058 continue;
1059 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1060 if (!GV)
1061 continue;
1062 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1063 continue;
1064 GV->eraseFromParent();
1065 }
1066}
1067
1069 return OMPBuilder.createPlatformSpecificName(Parts);
1070}
1071
1072static llvm::Function *
1074 const Expr *CombinerInitializer, const VarDecl *In,
1075 const VarDecl *Out, bool IsCombiner) {
1076 // void .omp_combiner.(Ty *in, Ty *out);
1077 ASTContext &C = CGM.getContext();
1078 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1079 FunctionArgList Args;
1080 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1081 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1082 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1083 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1084 Args.push_back(&OmpOutParm);
1085 Args.push_back(&OmpInParm);
1086 const CGFunctionInfo &FnInfo =
1087 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1088 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1089 std::string Name = CGM.getOpenMPRuntime().getName(
1090 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1091 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1092 Name, &CGM.getModule());
1093 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1094 if (CGM.getLangOpts().Optimize) {
1095 Fn->removeFnAttr(llvm::Attribute::NoInline);
1096 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1097 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1098 }
1099 CodeGenFunction CGF(CGM);
1100 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1101 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1102 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1103 Out->getLocation());
1104 CodeGenFunction::OMPPrivateScope Scope(CGF);
1105 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1106 Scope.addPrivate(
1107 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1108 .getAddress());
1109 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1110 Scope.addPrivate(
1111 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1112 .getAddress());
1113 (void)Scope.Privatize();
1114 if (!IsCombiner && Out->hasInit() &&
1115 !CGF.isTrivialInitializer(Out->getInit())) {
1116 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1117 Out->getType().getQualifiers(),
1118 /*IsInitializer=*/true);
1119 }
1120 if (CombinerInitializer)
1121 CGF.EmitIgnoredExpr(CombinerInitializer);
1122 Scope.ForceCleanup();
1123 CGF.FinishFunction();
1124 return Fn;
1125}
1126
1129 if (UDRMap.count(D) > 0)
1130 return;
1131 llvm::Function *Combiner = emitCombinerOrInitializer(
1132 CGM, D->getType(), D->getCombiner(),
1133 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1134 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1135 /*IsCombiner=*/true);
1136 llvm::Function *Initializer = nullptr;
1137 if (const Expr *Init = D->getInitializer()) {
1139 CGM, D->getType(),
1140 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1141 : nullptr,
1142 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1143 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1144 /*IsCombiner=*/false);
1145 }
1146 UDRMap.try_emplace(D, Combiner, Initializer);
1147 if (CGF)
1148 FunctionUDRMap[CGF->CurFn].push_back(D);
1149}
1150
1151std::pair<llvm::Function *, llvm::Function *>
1153 auto I = UDRMap.find(D);
1154 if (I != UDRMap.end())
1155 return I->second;
1156 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1157 return UDRMap.lookup(D);
1158}
1159
1160namespace {
1161// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1162// Builder if one is present.
1163struct PushAndPopStackRAII {
1164 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1165 bool HasCancel, llvm::omp::Directive Kind)
1166 : OMPBuilder(OMPBuilder) {
1167 if (!OMPBuilder)
1168 return;
1169
1170 // The following callback is the crucial part of clangs cleanup process.
1171 //
1172 // NOTE:
1173 // Once the OpenMPIRBuilder is used to create parallel regions (and
1174 // similar), the cancellation destination (Dest below) is determined via
1175 // IP. That means if we have variables to finalize we split the block at IP,
1176 // use the new block (=BB) as destination to build a JumpDest (via
1177 // getJumpDestInCurrentScope(BB)) which then is fed to
1178 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1179 // to push & pop an FinalizationInfo object.
1180 // The FiniCB will still be needed but at the point where the
1181 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1182 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1183 assert(IP.getBlock()->end() == IP.getPoint() &&
1184 "Clang CG should cause non-terminated block!");
1185 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1186 CGF.Builder.restoreIP(IP);
1188 CGF.getOMPCancelDestination(OMPD_parallel);
1189 CGF.EmitBranchThroughCleanup(Dest);
1190 return llvm::Error::success();
1191 };
1192
1193 // TODO: Remove this once we emit parallel regions through the
1194 // OpenMPIRBuilder as it can do this setup internally.
1195 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1196 OMPBuilder->pushFinalizationCB(std::move(FI));
1197 }
1198 ~PushAndPopStackRAII() {
1199 if (OMPBuilder)
1200 OMPBuilder->popFinalizationCB();
1201 }
1202 llvm::OpenMPIRBuilder *OMPBuilder;
1203};
1204} // namespace
1205
1207 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1208 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1209 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1210 assert(ThreadIDVar->getType()->isPointerType() &&
1211 "thread id variable must be of type kmp_int32 *");
1212 CodeGenFunction CGF(CGM, true);
1213 bool HasCancel = false;
1214 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1215 HasCancel = OPD->hasCancel();
1216 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1217 HasCancel = OPD->hasCancel();
1218 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1219 HasCancel = OPSD->hasCancel();
1220 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1221 HasCancel = OPFD->hasCancel();
1222 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1223 HasCancel = OPFD->hasCancel();
1224 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1225 HasCancel = OPFD->hasCancel();
1226 else if (const auto *OPFD =
1227 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1228 HasCancel = OPFD->hasCancel();
1229 else if (const auto *OPFD =
1230 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1231 HasCancel = OPFD->hasCancel();
1232
1233 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1234 // parallel region to make cancellation barriers work properly.
1235 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1236 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1237 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1238 HasCancel, OutlinedHelperName);
1239 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1241}
1242
1243std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1244 std::string Suffix = getName({"omp_outlined"});
1245 return (Name + Suffix).str();
1246}
1247
1249 return getOutlinedHelperName(CGF.CurFn->getName());
1250}
1251
1252std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1253 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1254 return (Name + Suffix).str();
1255}
1256
1259 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1260 const RegionCodeGenTy &CodeGen) {
1261 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1263 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1264 CodeGen);
1265}
1266
1269 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1270 const RegionCodeGenTy &CodeGen) {
1271 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1273 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1274 CodeGen);
1275}
1276
1278 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1280 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1281 bool Tied, unsigned &NumberOfParts) {
1282 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1283 PrePostActionTy &) {
1284 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1285 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1286 llvm::Value *TaskArgs[] = {
1287 UpLoc, ThreadID,
1288 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1289 TaskTVar->getType()->castAs<PointerType>())
1290 .getPointer(CGF)};
1291 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1292 CGM.getModule(), OMPRTL___kmpc_omp_task),
1293 TaskArgs);
1294 };
1295 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1296 UntiedCodeGen);
1297 CodeGen.setAction(Action);
1298 assert(!ThreadIDVar->getType()->isPointerType() &&
1299 "thread id variable must be of type kmp_int32 for tasks");
1300 const OpenMPDirectiveKind Region =
1301 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1302 : OMPD_task;
1303 const CapturedStmt *CS = D.getCapturedStmt(Region);
1304 bool HasCancel = false;
1305 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1306 HasCancel = TD->hasCancel();
1307 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1308 HasCancel = TD->hasCancel();
1309 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1310 HasCancel = TD->hasCancel();
1311 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1312 HasCancel = TD->hasCancel();
1313
1314 CodeGenFunction CGF(CGM, true);
1315 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1316 InnermostKind, HasCancel, Action);
1317 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1318 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1319 if (!Tied)
1320 NumberOfParts = Action.getNumberOfParts();
1321 return Res;
1322}
1323
1325 bool AtCurrentPoint) {
1326 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1327 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1328
1329 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1330 if (AtCurrentPoint) {
1331 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1332 CGF.Builder.GetInsertBlock());
1333 } else {
1334 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1335 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1336 }
1337}
1338
1340 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1341 if (Elem.ServiceInsertPt) {
1342 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1343 Elem.ServiceInsertPt = nullptr;
1344 Ptr->eraseFromParent();
1345 }
1346}
1347
1350 SmallString<128> &Buffer) {
1351 llvm::raw_svector_ostream OS(Buffer);
1352 // Build debug location
1354 OS << ";" << PLoc.getFilename() << ";";
1355 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1356 OS << FD->getQualifiedNameAsString();
1357 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1358 return OS.str();
1359}
1360
1363 unsigned Flags, bool EmitLoc) {
1364 uint32_t SrcLocStrSize;
1365 llvm::Constant *SrcLocStr;
1366 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1367 llvm::codegenoptions::NoDebugInfo) ||
1368 Loc.isInvalid()) {
1369 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1370 } else {
1371 std::string FunctionName;
1372 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1373 FunctionName = FD->getQualifiedNameAsString();
1375 const char *FileName = PLoc.getFilename();
1376 unsigned Line = PLoc.getLine();
1377 unsigned Column = PLoc.getColumn();
1378 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1379 Column, SrcLocStrSize);
1380 }
1381 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1382 return OMPBuilder.getOrCreateIdent(
1383 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1384}
1385
1388 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1389 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1390 // the clang invariants used below might be broken.
1391 if (CGM.getLangOpts().OpenMPIRBuilder) {
1392 SmallString<128> Buffer;
1393 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1394 uint32_t SrcLocStrSize;
1395 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1396 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1397 return OMPBuilder.getOrCreateThreadID(
1398 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1399 }
1400
1401 llvm::Value *ThreadID = nullptr;
1402 // Check whether we've already cached a load of the thread id in this
1403 // function.
1404 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1405 if (I != OpenMPLocThreadIDMap.end()) {
1406 ThreadID = I->second.ThreadID;
1407 if (ThreadID != nullptr)
1408 return ThreadID;
1409 }
1410 // If exceptions are enabled, do not use parameter to avoid possible crash.
1411 if (auto *OMPRegionInfo =
1412 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1413 if (OMPRegionInfo->getThreadIDVariable()) {
1414 // Check if this an outlined function with thread id passed as argument.
1415 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1416 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1417 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1418 !CGF.getLangOpts().CXXExceptions ||
1419 CGF.Builder.GetInsertBlock() == TopBlock ||
1420 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1421 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1422 TopBlock ||
1423 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1424 CGF.Builder.GetInsertBlock()) {
1425 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1426 // If value loaded in entry block, cache it and use it everywhere in
1427 // function.
1428 if (CGF.Builder.GetInsertBlock() == TopBlock)
1429 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1430 return ThreadID;
1431 }
1432 }
1433 }
1434
1435 // This is not an outlined function region - need to call __kmpc_int32
1436 // kmpc_global_thread_num(ident_t *loc).
1437 // Generate thread id value and cache this value for use across the
1438 // function.
1439 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1440 if (!Elem.ServiceInsertPt)
1442 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1443 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1445 llvm::CallInst *Call = CGF.Builder.CreateCall(
1446 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1447 OMPRTL___kmpc_global_thread_num),
1448 emitUpdateLocation(CGF, Loc));
1449 Call->setCallingConv(CGF.getRuntimeCC());
1450 Elem.ThreadID = Call;
1451 return Call;
1452}
1453
1455 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1456 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1458 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1459 }
1460 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1461 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1462 UDRMap.erase(D);
1463 FunctionUDRMap.erase(CGF.CurFn);
1464 }
1465 auto I = FunctionUDMMap.find(CGF.CurFn);
1466 if (I != FunctionUDMMap.end()) {
1467 for(const auto *D : I->second)
1468 UDMMap.erase(D);
1469 FunctionUDMMap.erase(I);
1470 }
1473}
1474
1476 return OMPBuilder.IdentPtr;
1477}
1478
1480 if (!Kmpc_MicroTy) {
1481 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1482 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1483 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1484 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1485 }
1486 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1487}
1488
1489static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1491 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1492 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1493 if (!DevTy)
1494 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1495
1496 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1497 case OMPDeclareTargetDeclAttr::DT_Host:
1498 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1499 break;
1500 case OMPDeclareTargetDeclAttr::DT_NoHost:
1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1502 break;
1503 case OMPDeclareTargetDeclAttr::DT_Any:
1504 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1505 break;
1506 default:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1508 break;
1509 }
1510}
1511
1512static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1514 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1515 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1516 if (!MapType)
1517 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1518 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1519 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1520 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1521 break;
1522 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1523 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1524 break;
1525 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1527 break;
1528 default:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1530 break;
1531 }
1532}
1533
1534static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1535 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1536 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1537
1538 auto FileInfoCallBack = [&]() {
1540 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1541
1542 llvm::sys::fs::UniqueID ID;
1543 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1544 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1545 }
1546
1547 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1548 };
1549
1550 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1551}
1552
1554 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1555
1556 auto LinkageForVariable = [&VD, this]() {
1558 };
1559
1560 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1561
1562 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1564 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1567 VD->isExternallyVisible(),
1569 VD->getCanonicalDecl()->getBeginLoc()),
1570 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1571 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1572 LinkageForVariable);
1573
1574 if (!addr)
1575 return ConstantAddress::invalid();
1576 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1577}
1578
1579llvm::Constant *
1581 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1583 // Lookup the entry, lazily creating it if necessary.
1584 std::string Suffix = getName({"cache", ""});
1585 return OMPBuilder.getOrCreateInternalVariable(
1586 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1587}
1588
1590 const VarDecl *VD,
1591 Address VDAddr,
1593 if (CGM.getLangOpts().OpenMPUseTLS &&
1595 return VDAddr;
1596
1597 llvm::Type *VarTy = VDAddr.getElementType();
1598 llvm::Value *Args[] = {
1600 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1603 return Address(
1604 CGF.EmitRuntimeCall(
1605 OMPBuilder.getOrCreateRuntimeFunction(
1606 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1607 Args),
1608 CGF.Int8Ty, VDAddr.getAlignment());
1609}
1610
1612 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1613 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1614 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1615 // library.
1616 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1617 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1618 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1619 OMPLoc);
1620 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1621 // to register constructor/destructor for variable.
1622 llvm::Value *Args[] = {
1623 OMPLoc,
1624 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1625 Ctor, CopyCtor, Dtor};
1626 CGF.EmitRuntimeCall(
1627 OMPBuilder.getOrCreateRuntimeFunction(
1628 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1629 Args);
1630}
1631
1633 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1634 bool PerformInit, CodeGenFunction *CGF) {
1635 if (CGM.getLangOpts().OpenMPUseTLS &&
1637 return nullptr;
1638
1639 VD = VD->getDefinition(CGM.getContext());
1640 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1641 QualType ASTTy = VD->getType();
1642
1643 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1644 const Expr *Init = VD->getAnyInitializer();
1645 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1646 // Generate function that re-emits the declaration's initializer into the
1647 // threadprivate copy of the variable VD
1648 CodeGenFunction CtorCGF(CGM);
1649 FunctionArgList Args;
1650 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1651 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1653 Args.push_back(&Dst);
1654
1656 CGM.getContext().VoidPtrTy, Args);
1657 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1658 std::string Name = getName({"__kmpc_global_ctor_", ""});
1659 llvm::Function *Fn =
1661 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1662 Args, Loc, Loc);
1663 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1664 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1666 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1667 VDAddr.getAlignment());
1668 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1669 /*IsInitializer=*/true);
1670 ArgVal = CtorCGF.EmitLoadOfScalar(
1671 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1673 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1674 CtorCGF.FinishFunction();
1675 Ctor = Fn;
1676 }
1678 // Generate function that emits destructor call for the threadprivate copy
1679 // of the variable VD
1680 CodeGenFunction DtorCGF(CGM);
1681 FunctionArgList Args;
1682 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1683 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1685 Args.push_back(&Dst);
1686
1688 CGM.getContext().VoidTy, Args);
1689 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1690 std::string Name = getName({"__kmpc_global_dtor_", ""});
1691 llvm::Function *Fn =
1693 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1694 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1695 Loc, Loc);
1696 // Create a scope with an artificial location for the body of this function.
1697 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1698 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1699 DtorCGF.GetAddrOfLocalVar(&Dst),
1700 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1701 DtorCGF.emitDestroy(
1702 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1703 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1704 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1705 DtorCGF.FinishFunction();
1706 Dtor = Fn;
1707 }
1708 // Do not emit init function if it is not required.
1709 if (!Ctor && !Dtor)
1710 return nullptr;
1711
1712 // Copying constructor for the threadprivate variable.
1713 // Must be NULL - reserved by runtime, but currently it requires that this
1714 // parameter is always NULL. Otherwise it fires assertion.
1715 CopyCtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1716 if (Ctor == nullptr) {
1717 Ctor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1718 }
1719 if (Dtor == nullptr) {
1720 Dtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1721 }
1722 if (!CGF) {
1723 auto *InitFunctionTy =
1724 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1725 std::string Name = getName({"__omp_threadprivate_init_", ""});
1726 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1727 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1728 CodeGenFunction InitCGF(CGM);
1729 FunctionArgList ArgList;
1730 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1731 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1732 Loc, Loc);
1733 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1734 InitCGF.FinishFunction();
1735 return InitFunction;
1736 }
1737 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1738 }
1739 return nullptr;
1740}
1741
1743 llvm::GlobalValue *GV) {
1744 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1745 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1746
1747 // We only need to handle active 'indirect' declare target functions.
1748 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1749 return;
1750
1751 // Get a mangled name to store the new device global in.
1752 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1754 SmallString<128> Name;
1755 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1756
1757 // We need to generate a new global to hold the address of the indirectly
1758 // called device function. Doing this allows us to keep the visibility and
1759 // linkage of the associated function unchanged while allowing the runtime to
1760 // access its value.
1761 llvm::GlobalValue *Addr = GV;
1762 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1763 Addr = new llvm::GlobalVariable(
1765 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1766 nullptr, llvm::GlobalValue::NotThreadLocal,
1767 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1768 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1769 }
1770
1771 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1773 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1774 llvm::GlobalValue::WeakODRLinkage);
1775}
1776
1778 QualType VarType,
1779 StringRef Name) {
1780 std::string Suffix = getName({"artificial", ""});
1781 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1782 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1783 VarLVType, Twine(Name).concat(Suffix).str());
1784 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1786 GAddr->setThreadLocal(/*Val=*/true);
1787 return Address(GAddr, GAddr->getValueType(),
1789 }
1790 std::string CacheSuffix = getName({"cache", ""});
1791 llvm::Value *Args[] = {
1795 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1796 /*isSigned=*/false),
1797 OMPBuilder.getOrCreateInternalVariable(
1799 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1800 return Address(
1802 CGF.EmitRuntimeCall(
1803 OMPBuilder.getOrCreateRuntimeFunction(
1804 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1805 Args),
1806 CGF.Builder.getPtrTy(0)),
1807 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1808}
1809
1811 const RegionCodeGenTy &ThenGen,
1812 const RegionCodeGenTy &ElseGen) {
1813 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1814
1815 // If the condition constant folds and can be elided, try to avoid emitting
1816 // the condition and the dead arm of the if/else.
1817 bool CondConstant;
1818 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1819 if (CondConstant)
1820 ThenGen(CGF);
1821 else
1822 ElseGen(CGF);
1823 return;
1824 }
1825
1826 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1827 // emit the conditional branch.
1828 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1829 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1830 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1831 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1832
1833 // Emit the 'then' code.
1834 CGF.EmitBlock(ThenBlock);
1835 ThenGen(CGF);
1836 CGF.EmitBranch(ContBlock);
1837 // Emit the 'else' code if present.
1838 // There is no need to emit line number for unconditional branch.
1840 CGF.EmitBlock(ElseBlock);
1841 ElseGen(CGF);
1842 // There is no need to emit line number for unconditional branch.
1844 CGF.EmitBranch(ContBlock);
1845 // Emit the continuation block for code after the if.
1846 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1847}
1848
1850 llvm::Function *OutlinedFn,
1851 ArrayRef<llvm::Value *> CapturedVars,
1852 const Expr *IfCond,
1853 llvm::Value *NumThreads) {
1854 if (!CGF.HaveInsertPoint())
1855 return;
1856 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1857 auto &M = CGM.getModule();
1858 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1859 this](CodeGenFunction &CGF, PrePostActionTy &) {
1860 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1862 llvm::Value *Args[] = {
1863 RTLoc,
1864 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1865 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1867 RealArgs.append(std::begin(Args), std::end(Args));
1868 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1869
1870 llvm::FunctionCallee RTLFn =
1871 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1872 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1873 };
1874 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1875 this](CodeGenFunction &CGF, PrePostActionTy &) {
1877 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1878 // Build calls:
1879 // __kmpc_serialized_parallel(&Loc, GTid);
1880 llvm::Value *Args[] = {RTLoc, ThreadID};
1881 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1882 M, OMPRTL___kmpc_serialized_parallel),
1883 Args);
1884
1885 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1886 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1887 RawAddress ZeroAddrBound =
1889 /*Name=*/".bound.zero.addr");
1890 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1892 // ThreadId for serialized parallels is 0.
1893 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1894 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1895 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1896
1897 // Ensure we do not inline the function. This is trivially true for the ones
1898 // passed to __kmpc_fork_call but the ones called in serialized regions
1899 // could be inlined. This is not a perfect but it is closer to the invariant
1900 // we want, namely, every data environment starts with a new function.
1901 // TODO: We should pass the if condition to the runtime function and do the
1902 // handling there. Much cleaner code.
1903 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1904 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1905 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1906
1907 // __kmpc_end_serialized_parallel(&Loc, GTid);
1908 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1909 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1910 M, OMPRTL___kmpc_end_serialized_parallel),
1911 EndArgs);
1912 };
1913 if (IfCond) {
1914 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1915 } else {
1916 RegionCodeGenTy ThenRCG(ThenGen);
1917 ThenRCG(CGF);
1918 }
1919}
1920
1921// If we're inside an (outlined) parallel region, use the region info's
1922// thread-ID variable (it is passed in a first argument of the outlined function
1923// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1924// regular serial code region, get thread ID by calling kmp_int32
1925// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1926// return the address of that temp.
1929 if (auto *OMPRegionInfo =
1930 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1931 if (OMPRegionInfo->getThreadIDVariable())
1932 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1933
1934 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1935 QualType Int32Ty =
1936 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1937 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1938 CGF.EmitStoreOfScalar(ThreadID,
1939 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1940
1941 return ThreadIDTemp;
1942}
1943
1944llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1945 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1946 std::string Name = getName({Prefix, "var"});
1947 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1948}
1949
1950namespace {
1951/// Common pre(post)-action for different OpenMP constructs.
1952class CommonActionTy final : public PrePostActionTy {
1953 llvm::FunctionCallee EnterCallee;
1954 ArrayRef<llvm::Value *> EnterArgs;
1955 llvm::FunctionCallee ExitCallee;
1956 ArrayRef<llvm::Value *> ExitArgs;
1957 bool Conditional;
1958 llvm::BasicBlock *ContBlock = nullptr;
1959
1960public:
1961 CommonActionTy(llvm::FunctionCallee EnterCallee,
1962 ArrayRef<llvm::Value *> EnterArgs,
1963 llvm::FunctionCallee ExitCallee,
1964 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1965 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1966 ExitArgs(ExitArgs), Conditional(Conditional) {}
1967 void Enter(CodeGenFunction &CGF) override {
1968 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1969 if (Conditional) {
1970 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1971 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1972 ContBlock = CGF.createBasicBlock("omp_if.end");
1973 // Generate the branch (If-stmt)
1974 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1975 CGF.EmitBlock(ThenBlock);
1976 }
1977 }
1978 void Done(CodeGenFunction &CGF) {
1979 // Emit the rest of blocks/branches
1980 CGF.EmitBranch(ContBlock);
1981 CGF.EmitBlock(ContBlock, true);
1982 }
1983 void Exit(CodeGenFunction &CGF) override {
1984 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1985 }
1986};
1987} // anonymous namespace
1988
1990 StringRef CriticalName,
1991 const RegionCodeGenTy &CriticalOpGen,
1992 SourceLocation Loc, const Expr *Hint) {
1993 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1994 // CriticalOpGen();
1995 // __kmpc_end_critical(ident_t *, gtid, Lock);
1996 // Prepare arguments and build a call to __kmpc_critical
1997 if (!CGF.HaveInsertPoint())
1998 return;
1999 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2000 getCriticalRegionLock(CriticalName)};
2001 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2002 std::end(Args));
2003 if (Hint) {
2004 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2005 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2006 }
2007 CommonActionTy Action(
2008 OMPBuilder.getOrCreateRuntimeFunction(
2009 CGM.getModule(),
2010 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2011 EnterArgs,
2012 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2013 OMPRTL___kmpc_end_critical),
2014 Args);
2015 CriticalOpGen.setAction(Action);
2016 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2017}
2018
2020 const RegionCodeGenTy &MasterOpGen,
2022 if (!CGF.HaveInsertPoint())
2023 return;
2024 // if(__kmpc_master(ident_t *, gtid)) {
2025 // MasterOpGen();
2026 // __kmpc_end_master(ident_t *, gtid);
2027 // }
2028 // Prepare arguments and build a call to __kmpc_master
2029 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2030 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2031 CGM.getModule(), OMPRTL___kmpc_master),
2032 Args,
2033 OMPBuilder.getOrCreateRuntimeFunction(
2034 CGM.getModule(), OMPRTL___kmpc_end_master),
2035 Args,
2036 /*Conditional=*/true);
2037 MasterOpGen.setAction(Action);
2038 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2039 Action.Done(CGF);
2040}
2041
2043 const RegionCodeGenTy &MaskedOpGen,
2044 SourceLocation Loc, const Expr *Filter) {
2045 if (!CGF.HaveInsertPoint())
2046 return;
2047 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2048 // MaskedOpGen();
2049 // __kmpc_end_masked(iden_t *, gtid);
2050 // }
2051 // Prepare arguments and build a call to __kmpc_masked
2052 llvm::Value *FilterVal = Filter
2053 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2054 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2055 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2056 FilterVal};
2057 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2058 getThreadID(CGF, Loc)};
2059 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2060 CGM.getModule(), OMPRTL___kmpc_masked),
2061 Args,
2062 OMPBuilder.getOrCreateRuntimeFunction(
2063 CGM.getModule(), OMPRTL___kmpc_end_masked),
2064 ArgsEnd,
2065 /*Conditional=*/true);
2066 MaskedOpGen.setAction(Action);
2067 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2068 Action.Done(CGF);
2069}
2070
2073 if (!CGF.HaveInsertPoint())
2074 return;
2075 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2076 OMPBuilder.createTaskyield(CGF.Builder);
2077 } else {
2078 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2079 llvm::Value *Args[] = {
2081 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2082 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2083 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2084 Args);
2085 }
2086
2087 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2088 Region->emitUntiedSwitch(CGF);
2089}
2090
2092 const RegionCodeGenTy &TaskgroupOpGen,
2094 if (!CGF.HaveInsertPoint())
2095 return;
2096 // __kmpc_taskgroup(ident_t *, gtid);
2097 // TaskgroupOpGen();
2098 // __kmpc_end_taskgroup(ident_t *, gtid);
2099 // Prepare arguments and build a call to __kmpc_taskgroup
2100 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2101 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2102 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2103 Args,
2104 OMPBuilder.getOrCreateRuntimeFunction(
2105 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2106 Args);
2107 TaskgroupOpGen.setAction(Action);
2108 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2109}
2110
2111/// Given an array of pointers to variables, project the address of a
2112/// given variable.
2114 unsigned Index, const VarDecl *Var) {
2115 // Pull out the pointer to the variable.
2116 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2117 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2118
2119 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2120 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2121}
2122
2124 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2125 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2126 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2128 ASTContext &C = CGM.getContext();
2129 // void copy_func(void *LHSArg, void *RHSArg);
2130 FunctionArgList Args;
2131 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2133 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2135 Args.push_back(&LHSArg);
2136 Args.push_back(&RHSArg);
2137 const auto &CGFI =
2138 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2139 std::string Name =
2140 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2141 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2142 llvm::GlobalValue::InternalLinkage, Name,
2143 &CGM.getModule());
2145 Fn->setDoesNotRecurse();
2146 CodeGenFunction CGF(CGM);
2147 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2148 // Dest = (void*[n])(LHSArg);
2149 // Src = (void*[n])(RHSArg);
2151 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2152 CGF.Builder.getPtrTy(0)),
2153 ArgsElemType, CGF.getPointerAlign());
2155 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2156 CGF.Builder.getPtrTy(0)),
2157 ArgsElemType, CGF.getPointerAlign());
2158 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2159 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2160 // ...
2161 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2162 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2163 const auto *DestVar =
2164 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2165 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2166
2167 const auto *SrcVar =
2168 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2169 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2170
2171 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2172 QualType Type = VD->getType();
2173 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2174 }
2175 CGF.FinishFunction();
2176 return Fn;
2177}
2178
2180 const RegionCodeGenTy &SingleOpGen,
2182 ArrayRef<const Expr *> CopyprivateVars,
2183 ArrayRef<const Expr *> SrcExprs,
2184 ArrayRef<const Expr *> DstExprs,
2185 ArrayRef<const Expr *> AssignmentOps) {
2186 if (!CGF.HaveInsertPoint())
2187 return;
2188 assert(CopyprivateVars.size() == SrcExprs.size() &&
2189 CopyprivateVars.size() == DstExprs.size() &&
2190 CopyprivateVars.size() == AssignmentOps.size());
2192 // int32 did_it = 0;
2193 // if(__kmpc_single(ident_t *, gtid)) {
2194 // SingleOpGen();
2195 // __kmpc_end_single(ident_t *, gtid);
2196 // did_it = 1;
2197 // }
2198 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2199 // <copy_func>, did_it);
2200
2201 Address DidIt = Address::invalid();
2202 if (!CopyprivateVars.empty()) {
2203 // int32 did_it = 0;
2204 QualType KmpInt32Ty =
2205 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2206 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2207 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2208 }
2209 // Prepare arguments and build a call to __kmpc_single
2210 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2211 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2212 CGM.getModule(), OMPRTL___kmpc_single),
2213 Args,
2214 OMPBuilder.getOrCreateRuntimeFunction(
2215 CGM.getModule(), OMPRTL___kmpc_end_single),
2216 Args,
2217 /*Conditional=*/true);
2218 SingleOpGen.setAction(Action);
2219 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2220 if (DidIt.isValid()) {
2221 // did_it = 1;
2222 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2223 }
2224 Action.Done(CGF);
2225 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2226 // <copy_func>, did_it);
2227 if (DidIt.isValid()) {
2228 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2229 QualType CopyprivateArrayTy = C.getConstantArrayType(
2230 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2231 /*IndexTypeQuals=*/0);
2232 // Create a list of all private variables for copyprivate.
2233 Address CopyprivateList =
2234 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2235 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2236 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2237 CGF.Builder.CreateStore(
2239 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2240 CGF.VoidPtrTy),
2241 Elem);
2242 }
2243 // Build function that copies private values from single region to all other
2244 // threads in the corresponding parallel region.
2245 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2246 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2247 SrcExprs, DstExprs, AssignmentOps, Loc);
2248 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2250 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2251 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2252 llvm::Value *Args[] = {
2253 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2254 getThreadID(CGF, Loc), // i32 <gtid>
2255 BufSize, // size_t <buf_size>
2256 CL.emitRawPointer(CGF), // void *<copyprivate list>
2257 CpyFn, // void (*) (void *, void *) <copy_func>
2258 DidItVal // i32 did_it
2259 };
2260 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2261 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2262 Args);
2263 }
2264}
2265
2267 const RegionCodeGenTy &OrderedOpGen,
2268 SourceLocation Loc, bool IsThreads) {
2269 if (!CGF.HaveInsertPoint())
2270 return;
2271 // __kmpc_ordered(ident_t *, gtid);
2272 // OrderedOpGen();
2273 // __kmpc_end_ordered(ident_t *, gtid);
2274 // Prepare arguments and build a call to __kmpc_ordered
2275 if (IsThreads) {
2276 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2277 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2278 CGM.getModule(), OMPRTL___kmpc_ordered),
2279 Args,
2280 OMPBuilder.getOrCreateRuntimeFunction(
2281 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2282 Args);
2283 OrderedOpGen.setAction(Action);
2284 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2285 return;
2286 }
2287 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2288}
2289
2291 unsigned Flags;
2292 if (Kind == OMPD_for)
2293 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2294 else if (Kind == OMPD_sections)
2295 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2296 else if (Kind == OMPD_single)
2297 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2298 else if (Kind == OMPD_barrier)
2299 Flags = OMP_IDENT_BARRIER_EXPL;
2300 else
2301 Flags = OMP_IDENT_BARRIER_IMPL;
2302 return Flags;
2303}
2304
2306 CodeGenFunction &CGF, const OMPLoopDirective &S,
2307 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2308 // Check if the loop directive is actually a doacross loop directive. In this
2309 // case choose static, 1 schedule.
2310 if (llvm::any_of(
2311 S.getClausesOfKind<OMPOrderedClause>(),
2312 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2313 ScheduleKind = OMPC_SCHEDULE_static;
2314 // Chunk size is 1 in this case.
2315 llvm::APInt ChunkSize(32, 1);
2316 ChunkExpr = IntegerLiteral::Create(
2317 CGF.getContext(), ChunkSize,
2318 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2319 SourceLocation());
2320 }
2321}
2322
2324 OpenMPDirectiveKind Kind, bool EmitChecks,
2325 bool ForceSimpleCall) {
2326 // Check if we should use the OMPBuilder
2327 auto *OMPRegionInfo =
2328 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2329 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2330 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
2331 OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2332 EmitChecks);
2333 assert(AfterIP && "unexpected error creating barrier");
2334 CGF.Builder.restoreIP(*AfterIP);
2335 return;
2336 }
2337
2338 if (!CGF.HaveInsertPoint())
2339 return;
2340 // Build call __kmpc_cancel_barrier(loc, thread_id);
2341 // Build call __kmpc_barrier(loc, thread_id);
2342 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2343 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2344 // thread_id);
2345 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2346 getThreadID(CGF, Loc)};
2347 if (OMPRegionInfo) {
2348 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2349 llvm::Value *Result = CGF.EmitRuntimeCall(
2350 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2351 OMPRTL___kmpc_cancel_barrier),
2352 Args);
2353 if (EmitChecks) {
2354 // if (__kmpc_cancel_barrier()) {
2355 // exit from construct;
2356 // }
2357 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2358 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2359 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2360 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2361 CGF.EmitBlock(ExitBB);
2362 // exit from construct;
2363 CodeGenFunction::JumpDest CancelDestination =
2364 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2365 CGF.EmitBranchThroughCleanup(CancelDestination);
2366 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2367 }
2368 return;
2369 }
2370 }
2371 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2372 CGM.getModule(), OMPRTL___kmpc_barrier),
2373 Args);
2374}
2375
2377 Expr *ME, bool IsFatal) {
2378 llvm::Value *MVL =
2379 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2380 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2381 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2382 // *message)
2383 llvm::Value *Args[] = {
2384 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2385 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2386 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2387 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2388 CGM.getModule(), OMPRTL___kmpc_error),
2389 Args);
2390}
2391
2392/// Map the OpenMP loop schedule to the runtime enumeration.
2393static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2394 bool Chunked, bool Ordered) {
2395 switch (ScheduleKind) {
2396 case OMPC_SCHEDULE_static:
2397 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2398 : (Ordered ? OMP_ord_static : OMP_sch_static);
2399 case OMPC_SCHEDULE_dynamic:
2400 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2401 case OMPC_SCHEDULE_guided:
2402 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2403 case OMPC_SCHEDULE_runtime:
2404 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2405 case OMPC_SCHEDULE_auto:
2406 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2408 assert(!Chunked && "chunk was specified but schedule kind not known");
2409 return Ordered ? OMP_ord_static : OMP_sch_static;
2410 }
2411 llvm_unreachable("Unexpected runtime schedule");
2412}
2413
2414/// Map the OpenMP distribute schedule to the runtime enumeration.
2415static OpenMPSchedType
2417 // only static is allowed for dist_schedule
2418 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2419}
2420
2422 bool Chunked) const {
2423 OpenMPSchedType Schedule =
2424 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2425 return Schedule == OMP_sch_static;
2426}
2427
2429 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2430 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2431 return Schedule == OMP_dist_sch_static;
2432}
2433
2435 bool Chunked) const {
2436 OpenMPSchedType Schedule =
2437 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2438 return Schedule == OMP_sch_static_chunked;
2439}
2440
2442 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2443 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2444 return Schedule == OMP_dist_sch_static_chunked;
2445}
2446
2448 OpenMPSchedType Schedule =
2449 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2450 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2451 return Schedule != OMP_sch_static;
2452}
2453
2454static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2457 int Modifier = 0;
2458 switch (M1) {
2459 case OMPC_SCHEDULE_MODIFIER_monotonic:
2460 Modifier = OMP_sch_modifier_monotonic;
2461 break;
2462 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2463 Modifier = OMP_sch_modifier_nonmonotonic;
2464 break;
2465 case OMPC_SCHEDULE_MODIFIER_simd:
2466 if (Schedule == OMP_sch_static_chunked)
2467 Schedule = OMP_sch_static_balanced_chunked;
2468 break;
2471 break;
2472 }
2473 switch (M2) {
2474 case OMPC_SCHEDULE_MODIFIER_monotonic:
2475 Modifier = OMP_sch_modifier_monotonic;
2476 break;
2477 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2478 Modifier = OMP_sch_modifier_nonmonotonic;
2479 break;
2480 case OMPC_SCHEDULE_MODIFIER_simd:
2481 if (Schedule == OMP_sch_static_chunked)
2482 Schedule = OMP_sch_static_balanced_chunked;
2483 break;
2486 break;
2487 }
2488 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2489 // If the static schedule kind is specified or if the ordered clause is
2490 // specified, and if the nonmonotonic modifier is not specified, the effect is
2491 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2492 // modifier is specified, the effect is as if the nonmonotonic modifier is
2493 // specified.
2494 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2495 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2496 Schedule == OMP_sch_static_balanced_chunked ||
2497 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2498 Schedule == OMP_dist_sch_static_chunked ||
2499 Schedule == OMP_dist_sch_static))
2500 Modifier = OMP_sch_modifier_nonmonotonic;
2501 }
2502 return Schedule | Modifier;
2503}
2504
2507 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2508 bool Ordered, const DispatchRTInput &DispatchValues) {
2509 if (!CGF.HaveInsertPoint())
2510 return;
2511 OpenMPSchedType Schedule = getRuntimeSchedule(
2512 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2513 assert(Ordered ||
2514 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2515 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2516 Schedule != OMP_sch_static_balanced_chunked));
2517 // Call __kmpc_dispatch_init(
2518 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2519 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2520 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2521
2522 // If the Chunk was not specified in the clause - use default value 1.
2523 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2524 : CGF.Builder.getIntN(IVSize, 1);
2525 llvm::Value *Args[] = {
2526 emitUpdateLocation(CGF, Loc),
2527 getThreadID(CGF, Loc),
2528 CGF.Builder.getInt32(addMonoNonMonoModifier(
2529 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2530 DispatchValues.LB, // Lower
2531 DispatchValues.UB, // Upper
2532 CGF.Builder.getIntN(IVSize, 1), // Stride
2533 Chunk // Chunk
2534 };
2535 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2536 Args);
2537}
2538
2541 if (!CGF.HaveInsertPoint())
2542 return;
2543 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2544 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2545 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2546}
2547
2549 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2550 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2552 const CGOpenMPRuntime::StaticRTInput &Values) {
2553 if (!CGF.HaveInsertPoint())
2554 return;
2555
2556 assert(!Values.Ordered);
2557 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2558 Schedule == OMP_sch_static_balanced_chunked ||
2559 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2560 Schedule == OMP_dist_sch_static ||
2561 Schedule == OMP_dist_sch_static_chunked);
2562
2563 // Call __kmpc_for_static_init(
2564 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2565 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2566 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2567 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2568 llvm::Value *Chunk = Values.Chunk;
2569 if (Chunk == nullptr) {
2570 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2571 Schedule == OMP_dist_sch_static) &&
2572 "expected static non-chunked schedule");
2573 // If the Chunk was not specified in the clause - use default value 1.
2574 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2575 } else {
2576 assert((Schedule == OMP_sch_static_chunked ||
2577 Schedule == OMP_sch_static_balanced_chunked ||
2578 Schedule == OMP_ord_static_chunked ||
2579 Schedule == OMP_dist_sch_static_chunked) &&
2580 "expected static chunked schedule");
2581 }
2582 llvm::Value *Args[] = {
2583 UpdateLocation,
2584 ThreadId,
2585 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2586 M2)), // Schedule type
2587 Values.IL.emitRawPointer(CGF), // &isLastIter
2588 Values.LB.emitRawPointer(CGF), // &LB
2589 Values.UB.emitRawPointer(CGF), // &UB
2590 Values.ST.emitRawPointer(CGF), // &Stride
2591 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2592 Chunk // Chunk
2593 };
2594 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2595}
2596
2599 OpenMPDirectiveKind DKind,
2600 const OpenMPScheduleTy &ScheduleKind,
2601 const StaticRTInput &Values) {
2602 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2603 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2604 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2605 "Expected loop-based or sections-based directive.");
2606 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2608 ? OMP_IDENT_WORK_LOOP
2609 : OMP_IDENT_WORK_SECTIONS);
2610 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2611 llvm::FunctionCallee StaticInitFunction =
2612 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2613 false);
2615 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2616 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2617}
2618
2622 const CGOpenMPRuntime::StaticRTInput &Values) {
2623 OpenMPSchedType ScheduleNum =
2624 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2625 llvm::Value *UpdatedLocation =
2626 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2627 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2628 llvm::FunctionCallee StaticInitFunction;
2629 bool isGPUDistribute =
2630 CGM.getLangOpts().OpenMPIsTargetDevice &&
2631 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2632 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2633 Values.IVSize, Values.IVSigned, isGPUDistribute);
2634
2635 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2636 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2638}
2639
2642 OpenMPDirectiveKind DKind) {
2643 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2644 DKind == OMPD_sections) &&
2645 "Expected distribute, for, or sections directive kind");
2646 if (!CGF.HaveInsertPoint())
2647 return;
2648 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2649 llvm::Value *Args[] = {
2652 (DKind == OMPD_target_teams_loop)
2653 ? OMP_IDENT_WORK_DISTRIBUTE
2654 : isOpenMPLoopDirective(DKind)
2655 ? OMP_IDENT_WORK_LOOP
2656 : OMP_IDENT_WORK_SECTIONS),
2657 getThreadID(CGF, Loc)};
2659 if (isOpenMPDistributeDirective(DKind) &&
2660 CGM.getLangOpts().OpenMPIsTargetDevice &&
2661 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2662 CGF.EmitRuntimeCall(
2663 OMPBuilder.getOrCreateRuntimeFunction(
2664 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2665 Args);
2666 else
2667 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2668 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2669 Args);
2670}
2671
2674 unsigned IVSize,
2675 bool IVSigned) {
2676 if (!CGF.HaveInsertPoint())
2677 return;
2678 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2679 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2680 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2681 Args);
2682}
2683
2685 SourceLocation Loc, unsigned IVSize,
2686 bool IVSigned, Address IL,
2687 Address LB, Address UB,
2688 Address ST) {
2689 // Call __kmpc_dispatch_next(
2690 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2691 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2692 // kmp_int[32|64] *p_stride);
2693 llvm::Value *Args[] = {
2695 IL.emitRawPointer(CGF), // &isLastIter
2696 LB.emitRawPointer(CGF), // &Lower
2697 UB.emitRawPointer(CGF), // &Upper
2698 ST.emitRawPointer(CGF) // &Stride
2699 };
2700 llvm::Value *Call = CGF.EmitRuntimeCall(
2701 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2702 return CGF.EmitScalarConversion(
2703 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2704 CGF.getContext().BoolTy, Loc);
2705}
2706
2708 llvm::Value *NumThreads,
2710 if (!CGF.HaveInsertPoint())
2711 return;
2712 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2713 llvm::Value *Args[] = {
2715 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2716 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2717 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2718 Args);
2719}
2720
2722 ProcBindKind ProcBind,
2724 if (!CGF.HaveInsertPoint())
2725 return;
2726 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2727 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2728 llvm::Value *Args[] = {
2730 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2731 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2732 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2733 Args);
2734}
2735
2737 SourceLocation Loc, llvm::AtomicOrdering AO) {
2738 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2739 OMPBuilder.createFlush(CGF.Builder);
2740 } else {
2741 if (!CGF.HaveInsertPoint())
2742 return;
2743 // Build call void __kmpc_flush(ident_t *loc)
2744 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2745 CGM.getModule(), OMPRTL___kmpc_flush),
2746 emitUpdateLocation(CGF, Loc));
2747 }
2748}
2749
2750namespace {
2751/// Indexes of fields for type kmp_task_t.
2752enum KmpTaskTFields {
2753 /// List of shared variables.
2754 KmpTaskTShareds,
2755 /// Task routine.
2756 KmpTaskTRoutine,
2757 /// Partition id for the untied tasks.
2758 KmpTaskTPartId,
2759 /// Function with call of destructors for private variables.
2760 Data1,
2761 /// Task priority.
2762 Data2,
2763 /// (Taskloops only) Lower bound.
2764 KmpTaskTLowerBound,
2765 /// (Taskloops only) Upper bound.
2766 KmpTaskTUpperBound,
2767 /// (Taskloops only) Stride.
2768 KmpTaskTStride,
2769 /// (Taskloops only) Is last iteration flag.
2770 KmpTaskTLastIter,
2771 /// (Taskloops only) Reduction data.
2772 KmpTaskTReductions,
2773};
2774} // anonymous namespace
2775
2777 // If we are in simd mode or there are no entries, we don't need to do
2778 // anything.
2779 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2780 return;
2781
2782 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2783 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2784 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2786 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2787 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2789 I != E; ++I) {
2790 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2791 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2793 I->getFirst(), EntryInfo.Line, 1);
2794 break;
2795 }
2796 }
2797 }
2798 switch (Kind) {
2799 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2800 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2801 DiagnosticsEngine::Error, "Offloading entry for target region in "
2802 "%0 is incorrect: either the "
2803 "address or the ID is invalid.");
2804 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2805 } break;
2806 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2807 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2808 DiagnosticsEngine::Error, "Offloading entry for declare target "
2809 "variable %0 is incorrect: the "
2810 "address is invalid.");
2811 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2812 } break;
2813 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2814 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2816 "Offloading entry for declare target variable is incorrect: the "
2817 "address is invalid.");
2818 CGM.getDiags().Report(DiagID);
2819 } break;
2820 }
2821 };
2822
2823 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2824}
2825
2827 if (!KmpRoutineEntryPtrTy) {
2828 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2830 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2832 KmpRoutineEntryPtrQTy = C.getPointerType(
2833 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2835 }
2836}
2837
2838namespace {
2839struct PrivateHelpersTy {
2840 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2841 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2842 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2843 PrivateElemInit(PrivateElemInit) {}
2844 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2845 const Expr *OriginalRef = nullptr;
2846 const VarDecl *Original = nullptr;
2847 const VarDecl *PrivateCopy = nullptr;
2848 const VarDecl *PrivateElemInit = nullptr;
2849 bool isLocalPrivate() const {
2850 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2851 }
2852};
2853typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2854} // anonymous namespace
2855
2856static bool isAllocatableDecl(const VarDecl *VD) {
2857 const VarDecl *CVD = VD->getCanonicalDecl();
2858 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2859 return false;
2860 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2861 // Use the default allocation.
2862 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2863 !AA->getAllocator());
2864}
2865
2866static RecordDecl *
2868 if (!Privates.empty()) {
2869 ASTContext &C = CGM.getContext();
2870 // Build struct .kmp_privates_t. {
2871 // /* private vars */
2872 // };
2873 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2874 RD->startDefinition();
2875 for (const auto &Pair : Privates) {
2876 const VarDecl *VD = Pair.second.Original;
2878 // If the private variable is a local variable with lvalue ref type,
2879 // allocate the pointer instead of the pointee type.
2880 if (Pair.second.isLocalPrivate()) {
2881 if (VD->getType()->isLValueReferenceType())
2882 Type = C.getPointerType(Type);
2883 if (isAllocatableDecl(VD))
2884 Type = C.getPointerType(Type);
2885 }
2887 if (VD->hasAttrs()) {
2888 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2889 E(VD->getAttrs().end());
2890 I != E; ++I)
2891 FD->addAttr(*I);
2892 }
2893 }
2894 RD->completeDefinition();
2895 return RD;
2896 }
2897 return nullptr;
2898}
2899
2900static RecordDecl *
2902 QualType KmpInt32Ty,
2903 QualType KmpRoutineEntryPointerQTy) {
2904 ASTContext &C = CGM.getContext();
2905 // Build struct kmp_task_t {
2906 // void * shareds;
2907 // kmp_routine_entry_t routine;
2908 // kmp_int32 part_id;
2909 // kmp_cmplrdata_t data1;
2910 // kmp_cmplrdata_t data2;
2911 // For taskloops additional fields:
2912 // kmp_uint64 lb;
2913 // kmp_uint64 ub;
2914 // kmp_int64 st;
2915 // kmp_int32 liter;
2916 // void * reductions;
2917 // };
2918 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2919 UD->startDefinition();
2920 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2921 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2922 UD->completeDefinition();
2923 QualType KmpCmplrdataTy = C.getRecordType(UD);
2924 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2925 RD->startDefinition();
2926 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2927 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2928 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2929 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2930 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2931 if (isOpenMPTaskLoopDirective(Kind)) {
2932 QualType KmpUInt64Ty =
2933 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2934 QualType KmpInt64Ty =
2935 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2936 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2937 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2938 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2939 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2940 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2941 }
2942 RD->completeDefinition();
2943 return RD;
2944}
2945
2946static RecordDecl *
2948 ArrayRef<PrivateDataTy> Privates) {
2949 ASTContext &C = CGM.getContext();
2950 // Build struct kmp_task_t_with_privates {
2951 // kmp_task_t task_data;
2952 // .kmp_privates_t. privates;
2953 // };
2954 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2955 RD->startDefinition();
2956 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2957 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2958 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2959 RD->completeDefinition();
2960 return RD;
2961}
2962
2963/// Emit a proxy function which accepts kmp_task_t as the second
2964/// argument.
2965/// \code
2966/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2967/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2968/// For taskloops:
2969/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2970/// tt->reductions, tt->shareds);
2971/// return 0;
2972/// }
2973/// \endcode
2974static llvm::Function *
2976 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2977 QualType KmpTaskTWithPrivatesPtrQTy,
2978 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2979 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2980 llvm::Value *TaskPrivatesMap) {
2981 ASTContext &C = CGM.getContext();
2982 FunctionArgList Args;
2983 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2985 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2986 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2988 Args.push_back(&GtidArg);
2989 Args.push_back(&TaskTypeArg);
2990 const auto &TaskEntryFnInfo =
2991 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
2992 llvm::FunctionType *TaskEntryTy =
2993 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2994 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
2995 auto *TaskEntry = llvm::Function::Create(
2996 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
2997 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
2998 TaskEntry->setDoesNotRecurse();
2999 CodeGenFunction CGF(CGM);
3000 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3001 Loc, Loc);
3002
3003 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3004 // tt,
3005 // For taskloops:
3006 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3007 // tt->task_data.shareds);
3008 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3009 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3010 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3011 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3012 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3013 const auto *KmpTaskTWithPrivatesQTyRD =
3014 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3015 LValue Base =
3016 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3017 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3018 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3019 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3020 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3021
3022 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3023 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3024 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3025 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3026 CGF.ConvertTypeForMem(SharedsPtrTy));
3027
3028 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3029 llvm::Value *PrivatesParam;
3030 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3031 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3032 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3033 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3034 } else {
3035 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3036 }
3037
3038 llvm::Value *CommonArgs[] = {
3039 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3040 CGF.Builder
3042 CGF.VoidPtrTy, CGF.Int8Ty)
3043 .emitRawPointer(CGF)};
3044 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3045 std::end(CommonArgs));
3046 if (isOpenMPTaskLoopDirective(Kind)) {
3047 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3048 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3049 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3050 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3051 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3052 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3053 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3054 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3055 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3056 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3057 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3058 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3059 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3060 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3061 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3062 CallArgs.push_back(LBParam);
3063 CallArgs.push_back(UBParam);
3064 CallArgs.push_back(StParam);
3065 CallArgs.push_back(LIParam);
3066 CallArgs.push_back(RParam);
3067 }
3068 CallArgs.push_back(SharedsParam);
3069
3070 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3071 CallArgs);
3072 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3073 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3074 CGF.FinishFunction();
3075 return TaskEntry;
3076}
3077
3080 QualType KmpInt32Ty,
3081 QualType KmpTaskTWithPrivatesPtrQTy,
3082 QualType KmpTaskTWithPrivatesQTy) {
3083 ASTContext &C = CGM.getContext();
3084 FunctionArgList Args;
3085 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3087 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3088 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3090 Args.push_back(&GtidArg);
3091 Args.push_back(&TaskTypeArg);
3092 const auto &DestructorFnInfo =
3093 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3094 llvm::FunctionType *DestructorFnTy =
3095 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3096 std::string Name =
3097 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3098 auto *DestructorFn =
3099 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3100 Name, &CGM.getModule());
3101 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3102 DestructorFnInfo);
3103 DestructorFn->setDoesNotRecurse();
3104 CodeGenFunction CGF(CGM);
3105 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3106 Args, Loc, Loc);
3107
3109 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3110 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3111 const auto *KmpTaskTWithPrivatesQTyRD =
3112 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3113 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3114 Base = CGF.EmitLValueForField(Base, *FI);
3115 for (const auto *Field :
3116 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3117 if (QualType::DestructionKind DtorKind =
3118 Field->getType().isDestructedType()) {
3119 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3120 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3121 }
3122 }
3123 CGF.FinishFunction();
3124 return DestructorFn;
3125}
3126
3127/// Emit a privates mapping function for correct handling of private and
3128/// firstprivate variables.
3129/// \code
3130/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3131/// **noalias priv1,..., <tyn> **noalias privn) {
3132/// *priv1 = &.privates.priv1;
3133/// ...;
3134/// *privn = &.privates.privn;
3135/// }
3136/// \endcode
3137static llvm::Value *
3139 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3140 ArrayRef<PrivateDataTy> Privates) {
3141 ASTContext &C = CGM.getContext();
3142 FunctionArgList Args;
3143 ImplicitParamDecl TaskPrivatesArg(
3144 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3145 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3147 Args.push_back(&TaskPrivatesArg);
3148 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3149 unsigned Counter = 1;
3150 for (const Expr *E : Data.PrivateVars) {
3151 Args.push_back(ImplicitParamDecl::Create(
3152 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3153 C.getPointerType(C.getPointerType(E->getType()))
3154 .withConst()
3155 .withRestrict(),
3157 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3158 PrivateVarsPos[VD] = Counter;
3159 ++Counter;
3160 }
3161 for (const Expr *E : Data.FirstprivateVars) {
3162 Args.push_back(ImplicitParamDecl::Create(
3163 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3164 C.getPointerType(C.getPointerType(E->getType()))
3165 .withConst()
3166 .withRestrict(),
3168 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3169 PrivateVarsPos[VD] = Counter;
3170 ++Counter;
3171 }
3172 for (const Expr *E : Data.LastprivateVars) {
3173 Args.push_back(ImplicitParamDecl::Create(
3174 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3175 C.getPointerType(C.getPointerType(E->getType()))
3176 .withConst()
3177 .withRestrict(),
3179 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3180 PrivateVarsPos[VD] = Counter;
3181 ++Counter;
3182 }
3183 for (const VarDecl *VD : Data.PrivateLocals) {
3185 if (VD->getType()->isLValueReferenceType())
3186 Ty = C.getPointerType(Ty);
3187 if (isAllocatableDecl(VD))
3188 Ty = C.getPointerType(Ty);
3189 Args.push_back(ImplicitParamDecl::Create(
3190 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3191 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3193 PrivateVarsPos[VD] = Counter;
3194 ++Counter;
3195 }
3196 const auto &TaskPrivatesMapFnInfo =
3197 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3198 llvm::FunctionType *TaskPrivatesMapTy =
3199 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3200 std::string Name =
3201 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3202 auto *TaskPrivatesMap = llvm::Function::Create(
3203 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3204 &CGM.getModule());
3205 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3206 TaskPrivatesMapFnInfo);
3207 if (CGM.getLangOpts().Optimize) {
3208 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3209 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3210 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3211 }
3212 CodeGenFunction CGF(CGM);
3213 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3214 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3215
3216 // *privi = &.privates.privi;
3218 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3219 TaskPrivatesArg.getType()->castAs<PointerType>());
3220 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3221 Counter = 0;
3222 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3223 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3224 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3225 LValue RefLVal =
3226 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3227 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3228 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3229 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3230 ++Counter;
3231 }
3232 CGF.FinishFunction();
3233 return TaskPrivatesMap;
3234}
3235
3236/// Emit initialization for private variables in task-based directives.
3239 Address KmpTaskSharedsPtr, LValue TDBase,
3240 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3241 QualType SharedsTy, QualType SharedsPtrTy,
3242 const OMPTaskDataTy &Data,
3243 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3244 ASTContext &C = CGF.getContext();
3245 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3246 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3247 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3248 ? OMPD_taskloop
3249 : OMPD_task;
3250 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3251 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3252 LValue SrcBase;
3253 bool IsTargetTask =
3254 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3255 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3256 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3257 // PointersArray, SizesArray, and MappersArray. The original variables for
3258 // these arrays are not captured and we get their addresses explicitly.
3259 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3260 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3261 SrcBase = CGF.MakeAddrLValue(
3263 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3264 CGF.ConvertTypeForMem(SharedsTy)),
3265 SharedsTy);
3266 }
3267 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3268 for (const PrivateDataTy &Pair : Privates) {
3269 // Do not initialize private locals.
3270 if (Pair.second.isLocalPrivate()) {
3271 ++FI;
3272 continue;
3273 }
3274 const VarDecl *VD = Pair.second.PrivateCopy;
3275 const Expr *Init = VD->getAnyInitializer();
3276 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3277 !CGF.isTrivialInitializer(Init)))) {
3278 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3279 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3280 const VarDecl *OriginalVD = Pair.second.Original;
3281 // Check if the variable is the target-based BasePointersArray,
3282 // PointersArray, SizesArray, or MappersArray.
3283 LValue SharedRefLValue;
3284 QualType Type = PrivateLValue.getType();
3285 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3286 if (IsTargetTask && !SharedField) {
3287 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3288 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3289 cast<CapturedDecl>(OriginalVD->getDeclContext())
3290 ->getNumParams() == 0 &&
3291 isa<TranslationUnitDecl>(
3292 cast<CapturedDecl>(OriginalVD->getDeclContext())
3293 ->getDeclContext()) &&
3294 "Expected artificial target data variable.");
3295 SharedRefLValue =
3296 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3297 } else if (ForDup) {
3298 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3299 SharedRefLValue = CGF.MakeAddrLValue(
3300 SharedRefLValue.getAddress().withAlignment(
3301 C.getDeclAlign(OriginalVD)),
3302 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3303 SharedRefLValue.getTBAAInfo());
3304 } else if (CGF.LambdaCaptureFields.count(
3305 Pair.second.Original->getCanonicalDecl()) > 0 ||
3306 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3307 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3308 } else {
3309 // Processing for implicitly captured variables.
3310 InlinedOpenMPRegionRAII Region(
3311 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3312 /*HasCancel=*/false, /*NoInheritance=*/true);
3313 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3314 }
3315 if (Type->isArrayType()) {
3316 // Initialize firstprivate array.
3317 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3318 // Perform simple memcpy.
3319 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3320 } else {
3321 // Initialize firstprivate array using element-by-element
3322 // initialization.
3324 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3325 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3326 Address SrcElement) {
3327 // Clean up any temporaries needed by the initialization.
3328 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3329 InitScope.addPrivate(Elem, SrcElement);
3330 (void)InitScope.Privatize();
3331 // Emit initialization for single element.
3332 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3333 CGF, &CapturesInfo);
3334 CGF.EmitAnyExprToMem(Init, DestElement,
3335 Init->getType().getQualifiers(),
3336 /*IsInitializer=*/false);
3337 });
3338 }
3339 } else {
3340 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3341 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3342 (void)InitScope.Privatize();
3343 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3344 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3345 /*capturedByInit=*/false);
3346 }
3347 } else {
3348 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3349 }
3350 }
3351 ++FI;
3352 }
3353}
3354
3355/// Check if duplication function is required for taskloops.
3357 ArrayRef<PrivateDataTy> Privates) {
3358 bool InitRequired = false;
3359 for (const PrivateDataTy &Pair : Privates) {
3360 if (Pair.second.isLocalPrivate())
3361 continue;
3362 const VarDecl *VD = Pair.second.PrivateCopy;
3363 const Expr *Init = VD->getAnyInitializer();
3364 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3366 if (InitRequired)
3367 break;
3368 }
3369 return InitRequired;
3370}
3371
3372
3373/// Emit task_dup function (for initialization of
3374/// private/firstprivate/lastprivate vars and last_iter flag)
3375/// \code
3376/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3377/// lastpriv) {
3378/// // setup lastprivate flag
3379/// task_dst->last = lastpriv;
3380/// // could be constructor calls here...
3381/// }
3382/// \endcode
3383static llvm::Value *
3386 QualType KmpTaskTWithPrivatesPtrQTy,
3387 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3388 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3389 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3390 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3391 ASTContext &C = CGM.getContext();
3392 FunctionArgList Args;
3393 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3394 KmpTaskTWithPrivatesPtrQTy,
3396 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3397 KmpTaskTWithPrivatesPtrQTy,
3399 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3401 Args.push_back(&DstArg);
3402 Args.push_back(&SrcArg);
3403 Args.push_back(&LastprivArg);
3404 const auto &TaskDupFnInfo =
3405 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3406 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3407 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3408 auto *TaskDup = llvm::Function::Create(
3409 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3410 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3411 TaskDup->setDoesNotRecurse();
3412 CodeGenFunction CGF(CGM);
3413 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3414 Loc);
3415
3416 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3417 CGF.GetAddrOfLocalVar(&DstArg),
3418 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3419 // task_dst->liter = lastpriv;
3420 if (WithLastIter) {
3421 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3423 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3424 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3425 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3426 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3427 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3428 }
3429
3430 // Emit initial values for private copies (if any).
3431 assert(!Privates.empty());
3432 Address KmpTaskSharedsPtr = Address::invalid();
3433 if (!Data.FirstprivateVars.empty()) {
3434 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3435 CGF.GetAddrOfLocalVar(&SrcArg),
3436 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3438 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3439 KmpTaskSharedsPtr = Address(
3441 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3442 KmpTaskTShareds)),
3443 Loc),
3444 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3445 }
3446 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3447 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3448 CGF.FinishFunction();
3449 return TaskDup;
3450}
3451
3452/// Checks if destructor function is required to be generated.
3453/// \return true if cleanups are required, false otherwise.
3454static bool
3455checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3456 ArrayRef<PrivateDataTy> Privates) {
3457 for (const PrivateDataTy &P : Privates) {
3458 if (P.second.isLocalPrivate())
3459 continue;
3460 QualType Ty = P.second.Original->getType().getNonReferenceType();
3461 if (Ty.isDestructedType())
3462 return true;
3463 }
3464 return false;
3465}
3466
3467namespace {
3468/// Loop generator for OpenMP iterator expression.
3469class OMPIteratorGeneratorScope final
3470 : public CodeGenFunction::OMPPrivateScope {
3471 CodeGenFunction &CGF;
3472 const OMPIteratorExpr *E = nullptr;
3475 OMPIteratorGeneratorScope() = delete;
3476 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3477
3478public:
3479 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3480 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3481 if (!E)
3482 return;
3484 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3485 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3486 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3487 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3488 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3489 addPrivate(
3490 HelperData.CounterVD,
3491 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3492 }
3493 Privatize();
3494
3495 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3496 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3497 LValue CLVal =
3498 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3499 HelperData.CounterVD->getType());
3500 // Counter = 0;
3502 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3503 CLVal);
3504 CodeGenFunction::JumpDest &ContDest =
3505 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3506 CodeGenFunction::JumpDest &ExitDest =
3507 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3508 // N = <number-of_iterations>;
3509 llvm::Value *N = Uppers[I];
3510 // cont:
3511 // if (Counter < N) goto body; else goto exit;
3512 CGF.EmitBlock(ContDest.getBlock());
3513 auto *CVal =
3514 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3515 llvm::Value *Cmp =
3517 ? CGF.Builder.CreateICmpSLT(CVal, N)
3518 : CGF.Builder.CreateICmpULT(CVal, N);
3519 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3520 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3521 // body:
3522 CGF.EmitBlock(BodyBB);
3523 // Iteri = Begini + Counter * Stepi;
3524 CGF.EmitIgnoredExpr(HelperData.Update);
3525 }
3526 }
3527 ~OMPIteratorGeneratorScope() {
3528 if (!E)
3529 return;
3530 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3531 // Counter = Counter + 1;
3532 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3533 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3534 // goto cont;
3535 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3536 // exit:
3537 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3538 }
3539 }
3540};
3541} // namespace
3542
3543static std::pair<llvm::Value *, llvm::Value *>
3545 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3546 llvm::Value *Addr;
3547 if (OASE) {
3548 const Expr *Base = OASE->getBase();
3549 Addr = CGF.EmitScalarExpr(Base);
3550 } else {
3551 Addr = CGF.EmitLValue(E).getPointer(CGF);
3552 }
3553 llvm::Value *SizeVal;
3554 QualType Ty = E->getType();
3555 if (OASE) {
3556 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3557 for (const Expr *SE : OASE->getDimensions()) {
3558 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3559 Sz = CGF.EmitScalarConversion(
3560 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3561 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3562 }
3563 } else if (const auto *ASE =
3564 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3565 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3566 Address UpAddrAddress = UpAddrLVal.getAddress();
3567 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3568 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3569 /*Idx0=*/1);
3570 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3571 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3572 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3573 } else {
3574 SizeVal = CGF.getTypeSize(Ty);
3575 }
3576 return std::make_pair(Addr, SizeVal);
3577}
3578
3579/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3580static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3581 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3582 if (KmpTaskAffinityInfoTy.isNull()) {
3583 RecordDecl *KmpAffinityInfoRD =
3584 C.buildImplicitRecord("kmp_task_affinity_info_t");
3585 KmpAffinityInfoRD->startDefinition();
3586 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3587 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3588 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3589 KmpAffinityInfoRD->completeDefinition();
3590 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3591 }
3592}
3593
3597 llvm::Function *TaskFunction, QualType SharedsTy,
3598 Address Shareds, const OMPTaskDataTy &Data) {
3601 // Aggregate privates and sort them by the alignment.
3602 const auto *I = Data.PrivateCopies.begin();
3603 for (const Expr *E : Data.PrivateVars) {
3604 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3605 Privates.emplace_back(
3606 C.getDeclAlign(VD),
3607 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3608 /*PrivateElemInit=*/nullptr));
3609 ++I;
3610 }
3611 I = Data.FirstprivateCopies.begin();
3612 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3613 for (const Expr *E : Data.FirstprivateVars) {
3614 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3615 Privates.emplace_back(
3616 C.getDeclAlign(VD),
3617 PrivateHelpersTy(
3618 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3619 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3620 ++I;
3621 ++IElemInitRef;
3622 }
3623 I = Data.LastprivateCopies.begin();
3624 for (const Expr *E : Data.LastprivateVars) {
3625 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3626 Privates.emplace_back(
3627 C.getDeclAlign(VD),
3628 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3629 /*PrivateElemInit=*/nullptr));
3630 ++I;
3631 }
3632 for (const VarDecl *VD : Data.PrivateLocals) {
3633 if (isAllocatableDecl(VD))
3634 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3635 else
3636 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3637 }
3638 llvm::stable_sort(Privates,
3639 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3640 return L.first > R.first;
3641 });
3642 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3643 // Build type kmp_routine_entry_t (if not built yet).
3644 emitKmpRoutineEntryT(KmpInt32Ty);
3645 // Build type kmp_task_t (if not built yet).
3646 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3649 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3650 }
3652 } else {
3653 assert((D.getDirectiveKind() == OMPD_task ||
3654 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3655 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3656 "Expected taskloop, task or target directive");
3657 if (SavedKmpTaskTQTy.isNull()) {
3659 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3660 }
3662 }
3663 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3664 // Build particular struct kmp_task_t for the given task.
3665 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3667 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3668 QualType KmpTaskTWithPrivatesPtrQTy =
3669 C.getPointerType(KmpTaskTWithPrivatesQTy);
3670 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3671 llvm::Value *KmpTaskTWithPrivatesTySize =
3672 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3673 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3674
3675 // Emit initial values for private copies (if any).
3676 llvm::Value *TaskPrivatesMap = nullptr;
3677 llvm::Type *TaskPrivatesMapTy =
3678 std::next(TaskFunction->arg_begin(), 3)->getType();
3679 if (!Privates.empty()) {
3680 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3681 TaskPrivatesMap =
3682 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3683 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3684 TaskPrivatesMap, TaskPrivatesMapTy);
3685 } else {
3686 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3687 cast<llvm::PointerType>(TaskPrivatesMapTy));
3688 }
3689 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3690 // kmp_task_t *tt);
3691 llvm::Function *TaskEntry = emitProxyTaskFunction(
3692 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3693 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3694 TaskPrivatesMap);
3695
3696 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3697 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3698 // kmp_routine_entry_t *task_entry);
3699 // Task flags. Format is taken from
3700 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3701 // description of kmp_tasking_flags struct.
3702 enum {
3703 TiedFlag = 0x1,
3704 FinalFlag = 0x2,
3705 DestructorsFlag = 0x8,
3706 PriorityFlag = 0x20,
3707 DetachableFlag = 0x40,
3708 };
3709 unsigned Flags = Data.Tied ? TiedFlag : 0;
3710 bool NeedsCleanup = false;
3711 if (!Privates.empty()) {
3712 NeedsCleanup =
3713 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3714 if (NeedsCleanup)
3715 Flags = Flags | DestructorsFlag;
3716 }
3717 if (Data.Priority.getInt())
3718 Flags = Flags | PriorityFlag;
3719 if (D.hasClausesOfKind<OMPDetachClause>())
3720 Flags = Flags | DetachableFlag;
3721 llvm::Value *TaskFlags =
3722 Data.Final.getPointer()
3723 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3724 CGF.Builder.getInt32(FinalFlag),
3725 CGF.Builder.getInt32(/*C=*/0))
3726 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3727 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3728 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3730 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3732 TaskEntry, KmpRoutineEntryPtrTy)};
3733 llvm::Value *NewTask;
3734 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3735 // Check if we have any device clause associated with the directive.
3736 const Expr *Device = nullptr;
3737 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3738 Device = C->getDevice();
3739 // Emit device ID if any otherwise use default value.
3740 llvm::Value *DeviceID;
3741 if (Device)
3742 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3743 CGF.Int64Ty, /*isSigned=*/true);
3744 else
3745 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3746 AllocArgs.push_back(DeviceID);
3747 NewTask = CGF.EmitRuntimeCall(
3748 OMPBuilder.getOrCreateRuntimeFunction(
3749 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3750 AllocArgs);
3751 } else {
3752 NewTask =
3753 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3754 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3755 AllocArgs);
3756 }
3757 // Emit detach clause initialization.
3758 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3759 // task_descriptor);
3760 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3761 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3762 LValue EvtLVal = CGF.EmitLValue(Evt);
3763
3764 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3765 // int gtid, kmp_task_t *task);
3766 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3767 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3768 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3769 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3770 OMPBuilder.getOrCreateRuntimeFunction(
3771 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3772 {Loc, Tid, NewTask});
3773 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3774 Evt->getExprLoc());
3775 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3776 }
3777 // Process affinity clauses.
3778 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3779 // Process list of affinity data.
3781 Address AffinitiesArray = Address::invalid();
3782 // Calculate number of elements to form the array of affinity data.
3783 llvm::Value *NumOfElements = nullptr;
3784 unsigned NumAffinities = 0;
3785 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3786 if (const Expr *Modifier = C->getModifier()) {
3787 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3788 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3789 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3790 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3791 NumOfElements =
3792 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3793 }
3794 } else {
3795 NumAffinities += C->varlist_size();
3796 }
3797 }
3799 // Fields ids in kmp_task_affinity_info record.
3800 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3801
3802 QualType KmpTaskAffinityInfoArrayTy;
3803 if (NumOfElements) {
3804 NumOfElements = CGF.Builder.CreateNUWAdd(
3805 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3806 auto *OVE = new (C) OpaqueValueExpr(
3807 Loc,
3808 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3809 VK_PRValue);
3810 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3811 RValue::get(NumOfElements));
3812 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3814 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3815 // Properly emit variable-sized array.
3816 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3818 CGF.EmitVarDecl(*PD);
3819 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3820 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3821 /*isSigned=*/false);
3822 } else {
3823 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3825 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3826 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3827 AffinitiesArray =
3828 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3829 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3830 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3831 /*isSigned=*/false);
3832 }
3833
3834 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3835 // Fill array by elements without iterators.
3836 unsigned Pos = 0;
3837 bool HasIterator = false;
3838 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3839 if (C->getModifier()) {
3840 HasIterator = true;
3841 continue;
3842 }
3843 for (const Expr *E : C->varlist()) {
3844 llvm::Value *Addr;
3845 llvm::Value *Size;
3846 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3847 LValue Base =
3848 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3850 // affs[i].base_addr = &<Affinities[i].second>;
3851 LValue BaseAddrLVal = CGF.EmitLValueForField(
3852 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3853 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3854 BaseAddrLVal);
3855 // affs[i].len = sizeof(<Affinities[i].second>);
3856 LValue LenLVal = CGF.EmitLValueForField(
3857 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3858 CGF.EmitStoreOfScalar(Size, LenLVal);
3859 ++Pos;
3860 }
3861 }
3862 LValue PosLVal;
3863 if (HasIterator) {
3864 PosLVal = CGF.MakeAddrLValue(
3865 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3866 C.getSizeType());
3867 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3868 }
3869 // Process elements with iterators.
3870 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3871 const Expr *Modifier = C->getModifier();
3872 if (!Modifier)
3873 continue;
3874 OMPIteratorGeneratorScope IteratorScope(
3875 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3876 for (const Expr *E : C->varlist()) {
3877 llvm::Value *Addr;
3878 llvm::Value *Size;
3879 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3880 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3881 LValue Base =
3882 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3884 // affs[i].base_addr = &<Affinities[i].second>;
3885 LValue BaseAddrLVal = CGF.EmitLValueForField(
3886 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3887 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3888 BaseAddrLVal);
3889 // affs[i].len = sizeof(<Affinities[i].second>);
3890 LValue LenLVal = CGF.EmitLValueForField(
3891 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3892 CGF.EmitStoreOfScalar(Size, LenLVal);
3893 Idx = CGF.Builder.CreateNUWAdd(
3894 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3895 CGF.EmitStoreOfScalar(Idx, PosLVal);
3896 }
3897 }
3898 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3899 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3900 // naffins, kmp_task_affinity_info_t *affin_list);
3901 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3902 llvm::Value *GTid = getThreadID(CGF, Loc);
3903 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3904 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3905 // FIXME: Emit the function and ignore its result for now unless the
3906 // runtime function is properly implemented.
3907 (void)CGF.EmitRuntimeCall(
3908 OMPBuilder.getOrCreateRuntimeFunction(
3909 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3910 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3911 }
3912 llvm::Value *NewTaskNewTaskTTy =
3914 NewTask, KmpTaskTWithPrivatesPtrTy);
3915 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3916 KmpTaskTWithPrivatesQTy);
3917 LValue TDBase =
3918 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3919 // Fill the data in the resulting kmp_task_t record.
3920 // Copy shareds if there are any.
3921 Address KmpTaskSharedsPtr = Address::invalid();
3922 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3923 KmpTaskSharedsPtr = Address(
3924 CGF.EmitLoadOfScalar(
3926 TDBase,
3927 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3928 Loc),
3929 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3930 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3931 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3932 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3933 }
3934 // Emit initial values for private copies (if any).
3936 if (!Privates.empty()) {
3937 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3938 SharedsTy, SharedsPtrTy, Data, Privates,
3939 /*ForDup=*/false);
3940 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3941 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3942 Result.TaskDupFn = emitTaskDupFunction(
3943 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3944 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3945 /*WithLastIter=*/!Data.LastprivateVars.empty());
3946 }
3947 }
3948 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3949 enum { Priority = 0, Destructors = 1 };
3950 // Provide pointer to function with destructors for privates.
3951 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3952 const RecordDecl *KmpCmplrdataUD =
3953 (*FI)->getType()->getAsUnionType()->getDecl();
3954 if (NeedsCleanup) {
3955 llvm::Value *DestructorFn = emitDestructorsFunction(
3956 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3957 KmpTaskTWithPrivatesQTy);
3958 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3959 LValue DestructorsLV = CGF.EmitLValueForField(
3960 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3962 DestructorFn, KmpRoutineEntryPtrTy),
3963 DestructorsLV);
3964 }
3965 // Set priority.
3966 if (Data.Priority.getInt()) {
3967 LValue Data2LV = CGF.EmitLValueForField(
3968 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3969 LValue PriorityLV = CGF.EmitLValueForField(
3970 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3971 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3972 }
3973 Result.NewTask = NewTask;
3974 Result.TaskEntry = TaskEntry;
3975 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3976 Result.TDBase = TDBase;
3977 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3978 return Result;
3979}
3980
3981/// Translates internal dependency kind into the runtime kind.
3983 RTLDependenceKindTy DepKind;
3984 switch (K) {
3985 case OMPC_DEPEND_in:
3986 DepKind = RTLDependenceKindTy::DepIn;
3987 break;
3988 // Out and InOut dependencies must use the same code.
3989 case OMPC_DEPEND_out:
3990 case OMPC_DEPEND_inout:
3991 DepKind = RTLDependenceKindTy::DepInOut;
3992 break;
3993 case OMPC_DEPEND_mutexinoutset:
3994 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
3995 break;
3996 case OMPC_DEPEND_inoutset:
3997 DepKind = RTLDependenceKindTy::DepInOutSet;
3998 break;
3999 case OMPC_DEPEND_outallmemory:
4000 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4001 break;
4002 case OMPC_DEPEND_source:
4003 case OMPC_DEPEND_sink:
4004 case OMPC_DEPEND_depobj:
4005 case OMPC_DEPEND_inoutallmemory:
4007 llvm_unreachable("Unknown task dependence type");
4008 }
4009 return DepKind;
4010}
4011
4012/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4013static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4014 QualType &FlagsTy) {
4015 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4016 if (KmpDependInfoTy.isNull()) {
4017 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4018 KmpDependInfoRD->startDefinition();
4019 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4020 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4021 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4022 KmpDependInfoRD->completeDefinition();
4023 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4024 }
4025}
4026
4027std::pair<llvm::Value *, LValue>
4031 QualType FlagsTy;
4032 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4033 RecordDecl *KmpDependInfoRD =
4034 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4035 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4037 DepobjLVal.getAddress().withElementType(
4038 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4039 KmpDependInfoPtrTy->castAs<PointerType>());
4040 Address DepObjAddr = CGF.Builder.CreateGEP(
4041 CGF, Base.getAddress(),
4042 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4043 LValue NumDepsBase = CGF.MakeAddrLValue(
4044 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4045 // NumDeps = deps[i].base_addr;
4046 LValue BaseAddrLVal = CGF.EmitLValueForField(
4047 NumDepsBase,
4048 *std::next(KmpDependInfoRD->field_begin(),
4049 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4050 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4051 return std::make_pair(NumDeps, Base);
4052}
4053
4054static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4055 llvm::PointerUnion<unsigned *, LValue *> Pos,
4057 Address DependenciesArray) {
4058 CodeGenModule &CGM = CGF.CGM;
4059 ASTContext &C = CGM.getContext();
4060 QualType FlagsTy;
4061 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4062 RecordDecl *KmpDependInfoRD =
4063 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4064 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4065
4066 OMPIteratorGeneratorScope IteratorScope(
4067 CGF, cast_or_null<OMPIteratorExpr>(
4068 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4069 : nullptr));
4070 for (const Expr *E : Data.DepExprs) {
4071 llvm::Value *Addr;
4072 llvm::Value *Size;
4073
4074 // The expression will be a nullptr in the 'omp_all_memory' case.
4075 if (E) {
4076 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4077 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4078 } else {
4079 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4080 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4081 }
4082 LValue Base;
4083 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4084 Base = CGF.MakeAddrLValue(
4085 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4086 } else {
4087 assert(E && "Expected a non-null expression");
4088 LValue &PosLVal = *cast<LValue *>(Pos);
4089 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4090 Base = CGF.MakeAddrLValue(
4091 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4092 }
4093 // deps[i].base_addr = &<Dependencies[i].second>;
4094 LValue BaseAddrLVal = CGF.EmitLValueForField(
4095 Base,
4096 *std::next(KmpDependInfoRD->field_begin(),
4097 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4098 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4099 // deps[i].len = sizeof(<Dependencies[i].second>);
4100 LValue LenLVal = CGF.EmitLValueForField(
4101 Base, *std::next(KmpDependInfoRD->field_begin(),
4102 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4103 CGF.EmitStoreOfScalar(Size, LenLVal);
4104 // deps[i].flags = <Dependencies[i].first>;
4105 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4106 LValue FlagsLVal = CGF.EmitLValueForField(
4107 Base,
4108 *std::next(KmpDependInfoRD->field_begin(),
4109 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4111 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4112 FlagsLVal);
4113 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4114 ++(*P);
4115 } else {
4116 LValue &PosLVal = *cast<LValue *>(Pos);
4117 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4118 Idx = CGF.Builder.CreateNUWAdd(Idx,
4119 llvm::ConstantInt::get(Idx->getType(), 1));
4120 CGF.EmitStoreOfScalar(Idx, PosLVal);
4121 }
4122 }
4123}
4124
4126 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4128 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4129 "Expected depobj dependency kind.");
4131 SmallVector<LValue, 4> SizeLVals;
4132 ASTContext &C = CGF.getContext();
4133 {
4134 OMPIteratorGeneratorScope IteratorScope(
4135 CGF, cast_or_null<OMPIteratorExpr>(
4136 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4137 : nullptr));
4138 for (const Expr *E : Data.DepExprs) {
4139 llvm::Value *NumDeps;
4140 LValue Base;
4141 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4142 std::tie(NumDeps, Base) =
4143 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4144 LValue NumLVal = CGF.MakeAddrLValue(
4145 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4146 C.getUIntPtrType());
4147 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4148 NumLVal.getAddress());
4149 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4150 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4151 CGF.EmitStoreOfScalar(Add, NumLVal);
4152 SizeLVals.push_back(NumLVal);
4153 }
4154 }
4155 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4156 llvm::Value *Size =
4157 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4158 Sizes.push_back(Size);
4159 }
4160 return Sizes;
4161}
4162
4164 QualType &KmpDependInfoTy,
4165 LValue PosLVal,
4167 Address DependenciesArray) {
4168 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4169 "Expected depobj dependency kind.");
4170 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4171 {
4172 OMPIteratorGeneratorScope IteratorScope(
4173 CGF, cast_or_null<OMPIteratorExpr>(
4174 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4175 : nullptr));
4176 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4177 const Expr *E = Data.DepExprs[I];
4178 llvm::Value *NumDeps;
4179 LValue Base;
4180 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4181 std::tie(NumDeps, Base) =
4182 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4183
4184 // memcopy dependency data.
4185 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4186 ElSize,
4187 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4188 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4189 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4190 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4191
4192 // Increase pos.
4193 // pos += size;
4194 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4195 CGF.EmitStoreOfScalar(Add, PosLVal);
4196 }
4197 }
4198}
4199
4200std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4203 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4204 return D.DepExprs.empty();
4205 }))
4206 return std::make_pair(nullptr, Address::invalid());
4207 // Process list of dependencies.
4209 Address DependenciesArray = Address::invalid();
4210 llvm::Value *NumOfElements = nullptr;
4211 unsigned NumDependencies = std::accumulate(
4212 Dependencies.begin(), Dependencies.end(), 0,
4213 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4214 return D.DepKind == OMPC_DEPEND_depobj
4215 ? V
4216 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4217 });
4218 QualType FlagsTy;
4219 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4220 bool HasDepobjDeps = false;
4221 bool HasRegularWithIterators = false;
4222 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4223 llvm::Value *NumOfRegularWithIterators =
4224 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4225 // Calculate number of depobj dependencies and regular deps with the
4226 // iterators.
4227 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4228 if (D.DepKind == OMPC_DEPEND_depobj) {
4231 for (llvm::Value *Size : Sizes) {
4232 NumOfDepobjElements =
4233 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4234 }
4235 HasDepobjDeps = true;
4236 continue;
4237 }
4238 // Include number of iterations, if any.
4239
4240 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4241 llvm::Value *ClauseIteratorSpace =
4242 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4243 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4244 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4245 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4246 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4247 }
4248 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4249 ClauseIteratorSpace,
4250 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4251 NumOfRegularWithIterators =
4252 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4253 HasRegularWithIterators = true;
4254 continue;
4255 }
4256 }
4257
4258 QualType KmpDependInfoArrayTy;
4259 if (HasDepobjDeps || HasRegularWithIterators) {
4260 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4261 /*isSigned=*/false);
4262 if (HasDepobjDeps) {
4263 NumOfElements =
4264 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4265 }
4266 if (HasRegularWithIterators) {
4267 NumOfElements =
4268 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4269 }
4270 auto *OVE = new (C) OpaqueValueExpr(
4271 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4272 VK_PRValue);
4273 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4274 RValue::get(NumOfElements));
4275 KmpDependInfoArrayTy =
4276 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4277 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4278 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4279 // Properly emit variable-sized array.
4280 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4282 CGF.EmitVarDecl(*PD);
4283 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4284 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4285 /*isSigned=*/false);
4286 } else {
4287 KmpDependInfoArrayTy = C.getConstantArrayType(
4288 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4289 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4290 DependenciesArray =
4291 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4292 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4293 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4294 /*isSigned=*/false);
4295 }
4296 unsigned Pos = 0;
4297 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4298 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4299 Dependencies[I].IteratorExpr)
4300 continue;
4301 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4302 DependenciesArray);
4303 }
4304 // Copy regular dependencies with iterators.
4305 LValue PosLVal = CGF.MakeAddrLValue(
4306 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4307 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4308 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4309 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4310 !Dependencies[I].IteratorExpr)
4311 continue;
4312 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4313 DependenciesArray);
4314 }
4315 // Copy final depobj arrays without iterators.
4316 if (HasDepobjDeps) {
4317 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4318 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4319 continue;
4320 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4321 DependenciesArray);
4322 }
4323 }
4324 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4325 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4326 return std::make_pair(NumOfElements, DependenciesArray);
4327}
4328
4330 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4332 if (Dependencies.DepExprs.empty())
4333 return Address::invalid();
4334 // Process list of dependencies.
4336 Address DependenciesArray = Address::invalid();
4337 unsigned NumDependencies = Dependencies.DepExprs.size();
4338 QualType FlagsTy;
4339 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4340 RecordDecl *KmpDependInfoRD =
4341 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4342
4343 llvm::Value *Size;
4344 // Define type kmp_depend_info[<Dependencies.size()>];
4345 // For depobj reserve one extra element to store the number of elements.
4346 // It is required to handle depobj(x) update(in) construct.
4347 // kmp_depend_info[<Dependencies.size()>] deps;
4348 llvm::Value *NumDepsVal;
4349 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4350 if (const auto *IE =
4351 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4352 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4353 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4354 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4355 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4356 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4357 }
4358 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4359 NumDepsVal);
4360 CharUnits SizeInBytes =
4361 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4362 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4363 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4364 NumDepsVal =
4365 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4366 } else {
4367 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4368 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4369 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4370 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4371 Size = CGM.getSize(Sz.alignTo(Align));
4372 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4373 }
4374 // Need to allocate on the dynamic memory.
4375 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4376 // Use default allocator.
4377 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4378 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4379
4380 llvm::Value *Addr =
4381 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4382 CGM.getModule(), OMPRTL___kmpc_alloc),
4383 Args, ".dep.arr.addr");
4384 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4386 Addr, CGF.Builder.getPtrTy(0));
4387 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4388 // Write number of elements in the first element of array for depobj.
4389 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4390 // deps[i].base_addr = NumDependencies;
4391 LValue BaseAddrLVal = CGF.EmitLValueForField(
4392 Base,
4393 *std::next(KmpDependInfoRD->field_begin(),
4394 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4395 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4396 llvm::PointerUnion<unsigned *, LValue *> Pos;
4397 unsigned Idx = 1;
4398 LValue PosLVal;
4399 if (Dependencies.IteratorExpr) {
4400 PosLVal = CGF.MakeAddrLValue(
4401 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4402 C.getSizeType());
4403 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4404 /*IsInit=*/true);
4405 Pos = &PosLVal;
4406 } else {
4407 Pos = &Idx;
4408 }
4409 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4410 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4411 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4412 CGF.Int8Ty);
4413 return DependenciesArray;
4414}
4415
4419 QualType FlagsTy;
4420 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4421 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4422 C.VoidPtrTy.castAs<PointerType>());
4423 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4425 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4427 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4428 Addr.getElementType(), Addr.emitRawPointer(CGF),
4429 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4430 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4431 CGF.VoidPtrTy);
4432 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4433 // Use default allocator.
4434 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4435 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4436
4437 // _kmpc_free(gtid, addr, nullptr);
4438 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4439 CGM.getModule(), OMPRTL___kmpc_free),
4440 Args);
4441}
4442
4444 OpenMPDependClauseKind NewDepKind,
4447 QualType FlagsTy;
4448 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4449 RecordDecl *KmpDependInfoRD =
4450 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4451 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4452 llvm::Value *NumDeps;
4453 LValue Base;
4454 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4455
4456 Address Begin = Base.getAddress();
4457 // Cast from pointer to array type to pointer to single element.
4458 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4459 Begin.emitRawPointer(CGF), NumDeps);
4460 // The basic structure here is a while-do loop.
4461 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4462 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4463 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4464 CGF.EmitBlock(BodyBB);
4465 llvm::PHINode *ElementPHI =
4466 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4467 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4468 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4469 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4470 Base.getTBAAInfo());
4471 // deps[i].flags = NewDepKind;
4472 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4473 LValue FlagsLVal = CGF.EmitLValueForField(
4474 Base, *std::next(KmpDependInfoRD->field_begin(),
4475 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4477 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4478 FlagsLVal);
4479
4480 // Shift the address forward by one element.
4481 llvm::Value *ElementNext =
4482 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4483 .emitRawPointer(CGF);
4484 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4485 llvm::Value *IsEmpty =
4486 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4487 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4488 // Done.
4489 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4490}
4491
4494 llvm::Function *TaskFunction,
4495 QualType SharedsTy, Address Shareds,
4496 const Expr *IfCond,
4497 const OMPTaskDataTy &Data) {
4498 if (!CGF.HaveInsertPoint())
4499 return;
4500
4502 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4503 llvm::Value *NewTask = Result.NewTask;
4504 llvm::Function *TaskEntry = Result.TaskEntry;
4505 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4506 LValue TDBase = Result.TDBase;
4507 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4508 // Process list of dependences.
4509 Address DependenciesArray = Address::invalid();
4510 llvm::Value *NumOfElements;
4511 std::tie(NumOfElements, DependenciesArray) =
4512 emitDependClause(CGF, Data.Dependences, Loc);
4513
4514 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4515 // libcall.
4516 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4517 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4518 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4519 // list is not empty
4520 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4521 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4522 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4523 llvm::Value *DepTaskArgs[7];
4524 if (!Data.Dependences.empty()) {
4525 DepTaskArgs[0] = UpLoc;
4526 DepTaskArgs[1] = ThreadID;
4527 DepTaskArgs[2] = NewTask;
4528 DepTaskArgs[3] = NumOfElements;
4529 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4530 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4531 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4532 }
4533 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4534 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4535 if (!Data.Tied) {
4536 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4537 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4538 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4539 }
4540 if (!Data.Dependences.empty()) {
4541 CGF.EmitRuntimeCall(
4542 OMPBuilder.getOrCreateRuntimeFunction(
4543 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4544 DepTaskArgs);
4545 } else {
4546 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4547 CGM.getModule(), OMPRTL___kmpc_omp_task),
4548 TaskArgs);
4549 }
4550 // Check if parent region is untied and build return for untied task;
4551 if (auto *Region =
4552 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4553 Region->emitUntiedSwitch(CGF);
4554 };
4555
4556 llvm::Value *DepWaitTaskArgs[7];
4557 if (!Data.Dependences.empty()) {
4558 DepWaitTaskArgs[0] = UpLoc;
4559 DepWaitTaskArgs[1] = ThreadID;
4560 DepWaitTaskArgs[2] = NumOfElements;
4561 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4562 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4563 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4564 DepWaitTaskArgs[6] =
4565 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4566 }
4567 auto &M = CGM.getModule();
4568 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4569 TaskEntry, &Data, &DepWaitTaskArgs,
4571 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4572 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4573 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4574 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4575 // is specified.
4576 if (!Data.Dependences.empty())
4577 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4578 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4579 DepWaitTaskArgs);
4580 // Call proxy_task_entry(gtid, new_task);
4581 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4582 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4583 Action.Enter(CGF);
4584 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4585 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4586 OutlinedFnArgs);
4587 };
4588
4589 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4590 // kmp_task_t *new_task);
4591 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4592 // kmp_task_t *new_task);
4593 RegionCodeGenTy RCG(CodeGen);
4594 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4595 M, OMPRTL___kmpc_omp_task_begin_if0),
4596 TaskArgs,
4597 OMPBuilder.getOrCreateRuntimeFunction(
4598 M, OMPRTL___kmpc_omp_task_complete_if0),
4599 TaskArgs);
4600 RCG.setAction(Action);
4601 RCG(CGF);
4602 };
4603
4604 if (IfCond) {
4605 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4606 } else {
4607 RegionCodeGenTy ThenRCG(ThenCodeGen);
4608 ThenRCG(CGF);
4609 }
4610}
4611
4613 const OMPLoopDirective &D,
4614 llvm::Function *TaskFunction,
4615 QualType SharedsTy, Address Shareds,
4616 const Expr *IfCond,
4617 const OMPTaskDataTy &Data) {
4618 if (!CGF.HaveInsertPoint())
4619 return;
4621 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4622 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4623 // libcall.
4624 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4625 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4626 // sched, kmp_uint64 grainsize, void *task_dup);
4627 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4628 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4629 llvm::Value *IfVal;
4630 if (IfCond) {
4631 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4632 /*isSigned=*/true);
4633 } else {
4634 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4635 }
4636
4637 LValue LBLVal = CGF.EmitLValueForField(
4638 Result.TDBase,
4639 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4640 const auto *LBVar =
4641 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4642 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4643 /*IsInitializer=*/true);
4644 LValue UBLVal = CGF.EmitLValueForField(
4645 Result.TDBase,
4646 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4647 const auto *UBVar =
4648 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4649 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4650 /*IsInitializer=*/true);
4651 LValue StLVal = CGF.EmitLValueForField(
4652 Result.TDBase,
4653 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4654 const auto *StVar =
4655 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4656 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4657 /*IsInitializer=*/true);
4658 // Store reductions address.
4659 LValue RedLVal = CGF.EmitLValueForField(
4660 Result.TDBase,
4661 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4662 if (Data.Reductions) {
4663 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4664 } else {
4665 CGF.EmitNullInitialization(RedLVal.getAddress(),
4666 CGF.getContext().VoidPtrTy);
4667 }
4668 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4670 UpLoc,
4671 ThreadID,
4672 Result.NewTask,
4673 IfVal,
4674 LBLVal.getPointer(CGF),
4675 UBLVal.getPointer(CGF),
4676 CGF.EmitLoadOfScalar(StLVal, Loc),
4677 llvm::ConstantInt::getSigned(
4678 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4679 llvm::ConstantInt::getSigned(
4680 CGF.IntTy, Data.Schedule.getPointer()
4681 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4682 : NoSchedule),
4683 Data.Schedule.getPointer()
4684 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4685 /*isSigned=*/false)
4686 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4687 if (Data.HasModifier)
4688 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4689
4690 TaskArgs.push_back(Result.TaskDupFn
4692 Result.TaskDupFn, CGF.VoidPtrTy)
4693 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4694 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4695 CGM.getModule(), Data.HasModifier
4696 ? OMPRTL___kmpc_taskloop_5
4697 : OMPRTL___kmpc_taskloop),
4698 TaskArgs);
4699}
4700
4701/// Emit reduction operation for each element of array (required for
4702/// array sections) LHS op = RHS.
4703/// \param Type Type of array.
4704/// \param LHSVar Variable on the left side of the reduction operation
4705/// (references element of array in original variable).
4706/// \param RHSVar Variable on the right side of the reduction operation
4707/// (references element of array in original variable).
4708/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4709/// RHSVar.
4711 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4712 const VarDecl *RHSVar,
4713 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4714 const Expr *, const Expr *)> &RedOpGen,
4715 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4716 const Expr *UpExpr = nullptr) {
4717 // Perform element-by-element initialization.
4718 QualType ElementTy;
4719 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4720 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4721
4722 // Drill down to the base element type on both arrays.
4723 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4724 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4725
4726 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4727 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4728 // Cast from pointer to array type to pointer to single element.
4729 llvm::Value *LHSEnd =
4730 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4731 // The basic structure here is a while-do loop.
4732 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4733 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4734 llvm::Value *IsEmpty =
4735 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4736 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4737
4738 // Enter the loop body, making that address the current address.
4739 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4740 CGF.EmitBlock(BodyBB);
4741
4742 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4743
4744 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4745 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4746 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4747 Address RHSElementCurrent(
4748 RHSElementPHI, RHSAddr.getElementType(),
4749 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4750
4751 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4752 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4753 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4754 Address LHSElementCurrent(
4755 LHSElementPHI, LHSAddr.getElementType(),
4756 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4757
4758 // Emit copy.
4759 CodeGenFunction::OMPPrivateScope Scope(CGF);
4760 Scope.addPrivate(LHSVar, LHSElementCurrent);
4761 Scope.addPrivate(RHSVar, RHSElementCurrent);
4762 Scope.Privatize();
4763 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4764 Scope.ForceCleanup();
4765
4766 // Shift the address forward by one element.
4767 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4768 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4769 "omp.arraycpy.dest.element");
4770 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4771 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4772 "omp.arraycpy.src.element");
4773 // Check whether we've reached the end.
4774 llvm::Value *Done =
4775 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4776 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4777 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4778 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4779
4780 // Done.
4781 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4782}
4783
4784/// Emit reduction combiner. If the combiner is a simple expression emit it as
4785/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4786/// UDR combiner function.
4788 const Expr *ReductionOp) {
4789 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4790 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4791 if (const auto *DRE =
4792 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4793 if (const auto *DRD =
4794 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4795 std::pair<llvm::Function *, llvm::Function *> Reduction =
4798 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4799 CGF.EmitIgnoredExpr(ReductionOp);
4800 return;
4801 }
4802 CGF.EmitIgnoredExpr(ReductionOp);
4803}
4804
4806 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4808 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4810
4811 // void reduction_func(void *LHSArg, void *RHSArg);
4812 FunctionArgList Args;
4813 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4815 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4817 Args.push_back(&LHSArg);
4818 Args.push_back(&RHSArg);
4819 const auto &CGFI =
4821 std::string Name = getReductionFuncName(ReducerName);
4822 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4823 llvm::GlobalValue::InternalLinkage, Name,
4824 &CGM.getModule());
4826 Fn->setDoesNotRecurse();
4827 CodeGenFunction CGF(CGM);
4828 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4829
4830 // Dst = (void*[n])(LHSArg);
4831 // Src = (void*[n])(RHSArg);
4833 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4834 CGF.Builder.getPtrTy(0)),
4835 ArgsElemType, CGF.getPointerAlign());
4837 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4838 CGF.Builder.getPtrTy(0)),
4839 ArgsElemType, CGF.getPointerAlign());
4840
4841 // ...
4842 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4843 // ...
4845 const auto *IPriv = Privates.begin();
4846 unsigned Idx = 0;
4847 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4848 const auto *RHSVar =
4849 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4850 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4851 const auto *LHSVar =
4852 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4853 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4854 QualType PrivTy = (*IPriv)->getType();
4855 if (PrivTy->isVariablyModifiedType()) {
4856 // Get array size and emit VLA type.
4857 ++Idx;
4858 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4859 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4860 const VariableArrayType *VLA =
4861 CGF.getContext().getAsVariableArrayType(PrivTy);
4862 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4864 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4865 CGF.EmitVariablyModifiedType(PrivTy);
4866 }
4867 }
4868 Scope.Privatize();
4869 IPriv = Privates.begin();
4870 const auto *ILHS = LHSExprs.begin();
4871 const auto *IRHS = RHSExprs.begin();
4872 for (const Expr *E : ReductionOps) {
4873 if ((*IPriv)->getType()->isArrayType()) {
4874 // Emit reduction for array section.
4875 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4876 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4878 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4879 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4880 emitReductionCombiner(CGF, E);
4881 });
4882 } else {
4883 // Emit reduction for array subscript or single variable.
4885 }
4886 ++IPriv;
4887 ++ILHS;
4888 ++IRHS;
4889 }
4890 Scope.ForceCleanup();
4891 CGF.FinishFunction();
4892 return Fn;
4893}
4894
4896 const Expr *ReductionOp,
4897 const Expr *PrivateRef,
4898 const DeclRefExpr *LHS,
4899 const DeclRefExpr *RHS) {
4900 if (PrivateRef->getType()->isArrayType()) {
4901 // Emit reduction for array section.
4902 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4903 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4905 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4906 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4907 emitReductionCombiner(CGF, ReductionOp);
4908 });
4909 } else {
4910 // Emit reduction for array subscript or single variable.
4911 emitReductionCombiner(CGF, ReductionOp);
4912 }
4913}
4914
4916 ArrayRef<const Expr *> Privates,
4917 ArrayRef<const Expr *> LHSExprs,
4918 ArrayRef<const Expr *> RHSExprs,
4919 ArrayRef<const Expr *> ReductionOps,
4920 ReductionOptionsTy Options) {
4921 if (!CGF.HaveInsertPoint())
4922 return;
4923
4924 bool WithNowait = Options.WithNowait;
4925 bool SimpleReduction = Options.SimpleReduction;
4926
4927 // Next code should be emitted for reduction:
4928 //
4929 // static kmp_critical_name lock = { 0 };
4930 //
4931 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4932 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4933 // ...
4934 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4935 // *(Type<n>-1*)rhs[<n>-1]);
4936 // }
4937 //
4938 // ...
4939 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4940 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4941 // RedList, reduce_func, &<lock>)) {
4942 // case 1:
4943 // ...
4944 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4945 // ...
4946 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4947 // break;
4948 // case 2:
4949 // ...
4950 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4951 // ...
4952 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4953 // break;
4954 // default:;
4955 // }
4956 //
4957 // if SimpleReduction is true, only the next code is generated:
4958 // ...
4959 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4960 // ...
4961
4963
4964 if (SimpleReduction) {
4966 const auto *IPriv = Privates.begin();
4967 const auto *ILHS = LHSExprs.begin();
4968 const auto *IRHS = RHSExprs.begin();
4969 for (const Expr *E : ReductionOps) {
4970 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4971 cast<DeclRefExpr>(*IRHS));
4972 ++IPriv;
4973 ++ILHS;
4974 ++IRHS;
4975 }
4976 return;
4977 }
4978
4979 // 1. Build a list of reduction variables.
4980 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4981 auto Size = RHSExprs.size();
4982 for (const Expr *E : Privates) {
4984 // Reserve place for array size.
4985 ++Size;
4986 }
4987 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4988 QualType ReductionArrayTy = C.getConstantArrayType(
4989 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
4990 /*IndexTypeQuals=*/0);
4991 RawAddress ReductionList =
4992 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4993 const auto *IPriv = Privates.begin();
4994 unsigned Idx = 0;
4995 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4996 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
4997 CGF.Builder.CreateStore(
4999 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5000 Elem);
5001 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5002 // Store array size.
5003 ++Idx;
5004 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5005 llvm::Value *Size = CGF.Builder.CreateIntCast(
5006 CGF.getVLASize(
5007 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5008 .NumElts,
5009 CGF.SizeTy, /*isSigned=*/false);
5010 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5011 Elem);
5012 }
5013 }
5014
5015 // 2. Emit reduce_func().
5016 llvm::Function *ReductionFn = emitReductionFunction(
5017 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5018 Privates, LHSExprs, RHSExprs, ReductionOps);
5019
5020 // 3. Create static kmp_critical_name lock = { 0 };
5021 std::string Name = getName({"reduction"});
5022 llvm::Value *Lock = getCriticalRegionLock(Name);
5023
5024 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5025 // RedList, reduce_func, &<lock>);
5026 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5027 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5028 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5029 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5030 ReductionList.getPointer(), CGF.VoidPtrTy);
5031 llvm::Value *Args[] = {
5032 IdentTLoc, // ident_t *<loc>
5033 ThreadId, // i32 <gtid>
5034 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5035 ReductionArrayTySize, // size_type sizeof(RedList)
5036 RL, // void *RedList
5037 ReductionFn, // void (*) (void *, void *) <reduce_func>
5038 Lock // kmp_critical_name *&<lock>
5039 };
5040 llvm::Value *Res = CGF.EmitRuntimeCall(
5041 OMPBuilder.getOrCreateRuntimeFunction(
5042 CGM.getModule(),
5043 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5044 Args);
5045
5046 // 5. Build switch(res)
5047 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5048 llvm::SwitchInst *SwInst =
5049 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5050
5051 // 6. Build case 1:
5052 // ...
5053 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5054 // ...
5055 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5056 // break;
5057 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5058 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5059 CGF.EmitBlock(Case1BB);
5060
5061 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5062 llvm::Value *EndArgs[] = {
5063 IdentTLoc, // ident_t *<loc>
5064 ThreadId, // i32 <gtid>
5065 Lock // kmp_critical_name *&<lock>
5066 };
5067 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5068 CodeGenFunction &CGF, PrePostActionTy &Action) {
5070 const auto *IPriv = Privates.begin();
5071 const auto *ILHS = LHSExprs.begin();
5072 const auto *IRHS = RHSExprs.begin();
5073 for (const Expr *E : ReductionOps) {
5074 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5075 cast<DeclRefExpr>(*IRHS));
5076 ++IPriv;
5077 ++ILHS;
5078 ++IRHS;
5079 }
5080 };
5081 RegionCodeGenTy RCG(CodeGen);
5082 CommonActionTy Action(
5083 nullptr, {},
5084 OMPBuilder.getOrCreateRuntimeFunction(
5085 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5086 : OMPRTL___kmpc_end_reduce),
5087 EndArgs);
5088 RCG.setAction(Action);
5089 RCG(CGF);
5090
5091 CGF.EmitBranch(DefaultBB);
5092
5093 // 7. Build case 2:
5094 // ...
5095 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5096 // ...
5097 // break;
5098 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5099 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5100 CGF.EmitBlock(Case2BB);
5101
5102 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5103 CodeGenFunction &CGF, PrePostActionTy &Action) {
5104 const auto *ILHS = LHSExprs.begin();
5105 const auto *IRHS = RHSExprs.begin();
5106 const auto *IPriv = Privates.begin();
5107 for (const Expr *E : ReductionOps) {
5108 const Expr *XExpr = nullptr;
5109 const Expr *EExpr = nullptr;
5110 const Expr *UpExpr = nullptr;
5111 BinaryOperatorKind BO = BO_Comma;
5112 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5113 if (BO->getOpcode() == BO_Assign) {
5114 XExpr = BO->getLHS();
5115 UpExpr = BO->getRHS();
5116 }
5117 }
5118 // Try to emit update expression as a simple atomic.
5119 const Expr *RHSExpr = UpExpr;
5120 if (RHSExpr) {
5121 // Analyze RHS part of the whole expression.
5122 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5123 RHSExpr->IgnoreParenImpCasts())) {
5124 // If this is a conditional operator, analyze its condition for
5125 // min/max reduction operator.
5126 RHSExpr = ACO->getCond();
5127 }
5128 if (const auto *BORHS =
5129 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5130 EExpr = BORHS->getRHS();
5131 BO = BORHS->getOpcode();
5132 }
5133 }
5134 if (XExpr) {
5135 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5136 auto &&AtomicRedGen = [BO, VD,
5137 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5138 const Expr *EExpr, const Expr *UpExpr) {
5139 LValue X = CGF.EmitLValue(XExpr);
5140 RValue E;
5141 if (EExpr)
5142 E = CGF.EmitAnyExpr(EExpr);
5143 CGF.EmitOMPAtomicSimpleUpdateExpr(
5144 X, E, BO, /*IsXLHSInRHSPart=*/true,
5145 llvm::AtomicOrdering::Monotonic, Loc,
5146 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5147 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5148 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5149 CGF.emitOMPSimpleStore(
5150 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5151 VD->getType().getNonReferenceType(), Loc);
5152 PrivateScope.addPrivate(VD, LHSTemp);
5153 (void)PrivateScope.Privatize();
5154 return CGF.EmitAnyExpr(UpExpr);
5155 });
5156 };
5157 if ((*IPriv)->getType()->isArrayType()) {
5158 // Emit atomic reduction for array section.
5159 const auto *RHSVar =
5160 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5161 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5162 AtomicRedGen, XExpr, EExpr, UpExpr);
5163 } else {
5164 // Emit atomic reduction for array subscript or single variable.
5165 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5166 }
5167 } else {
5168 // Emit as a critical region.
5169 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5170 const Expr *, const Expr *) {
5172 std::string Name = RT.getName({"atomic_reduction"});
5174 CGF, Name,
5175 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5176 Action.Enter(CGF);
5178 },
5179 Loc);
5180 };
5181 if ((*IPriv)->getType()->isArrayType()) {
5182 const auto *LHSVar =
5183 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5184 const auto *RHSVar =
5185 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5186 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5187 CritRedGen);
5188 } else {
5189 CritRedGen(CGF, nullptr, nullptr, nullptr);
5190 }
5191 }
5192 ++ILHS;
5193 ++IRHS;
5194 ++IPriv;
5195 }
5196 };
5197 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5198 if (!WithNowait) {
5199 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5200 llvm::Value *EndArgs[] = {
5201 IdentTLoc, // ident_t *<loc>
5202 ThreadId, // i32 <gtid>
5203 Lock // kmp_critical_name *&<lock>
5204 };
5205 CommonActionTy Action(nullptr, {},
5206 OMPBuilder.getOrCreateRuntimeFunction(
5207 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5208 EndArgs);
5209 AtomicRCG.setAction(Action);
5210 AtomicRCG(CGF);
5211 } else {
5212 AtomicRCG(CGF);
5213 }
5214
5215 CGF.EmitBranch(DefaultBB);
5216 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5217}
5218
5219/// Generates unique name for artificial threadprivate variables.
5220/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5221static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5222 const Expr *Ref) {
5223 SmallString<256> Buffer;
5224 llvm::raw_svector_ostream Out(Buffer);
5225 const clang::DeclRefExpr *DE;
5226 const VarDecl *D = ::getBaseDecl(Ref, DE);
5227 if (!D)
5228 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5229 D = D->getCanonicalDecl();
5230 std::string Name = CGM.getOpenMPRuntime().getName(
5231 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5232 Out << Prefix << Name << "_"
5234 return std::string(Out.str());
5235}
5236
5237/// Emits reduction initializer function:
5238/// \code
5239/// void @.red_init(void* %arg, void* %orig) {
5240/// %0 = bitcast void* %arg to <type>*
5241/// store <type> <init>, <type>* %0
5242/// ret void
5243/// }
5244/// \endcode
5245static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5247 ReductionCodeGen &RCG, unsigned N) {
5248 ASTContext &C = CGM.getContext();
5249 QualType VoidPtrTy = C.VoidPtrTy;
5250 VoidPtrTy.addRestrict();
5251 FunctionArgList Args;
5252 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5254 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5256 Args.emplace_back(&Param);
5257 Args.emplace_back(&ParamOrig);
5258 const auto &FnInfo =
5259 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5260 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5261 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5262 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5263 Name, &CGM.getModule());
5264 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5265 Fn->setDoesNotRecurse();
5266 CodeGenFunction CGF(CGM);
5267 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5268 QualType PrivateType = RCG.getPrivateType(N);
5269 Address PrivateAddr = CGF.EmitLoadOfPointer(
5270 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5271 C.getPointerType(PrivateType)->castAs<PointerType>());
5272 llvm::Value *Size = nullptr;
5273 // If the size of the reduction item is non-constant, load it from global
5274 // threadprivate variable.
5275 if (RCG.getSizes(N).second) {
5277 CGF, CGM.getContext().getSizeType(),
5278 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5279 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5280 CGM.getContext().getSizeType(), Loc);
5281 }
5282 RCG.emitAggregateType(CGF, N, Size);
5283 Address OrigAddr = Address::invalid();
5284 // If initializer uses initializer from declare reduction construct, emit a
5285 // pointer to the address of the original reduction item (reuired by reduction
5286 // initializer)
5287 if (RCG.usesReductionInitializer(N)) {
5288 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5289 OrigAddr = CGF.EmitLoadOfPointer(
5290 SharedAddr,
5291 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5292 }
5293 // Emit the initializer:
5294 // %0 = bitcast void* %arg to <type>*
5295 // store <type> <init>, <type>* %0
5296 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5297 [](CodeGenFunction &) { return false; });
5298 CGF.FinishFunction();
5299 return Fn;
5300}
5301
5302/// Emits reduction combiner function:
5303/// \code
5304/// void @.red_comb(void* %arg0, void* %arg1) {
5305/// %lhs = bitcast void* %arg0 to <type>*
5306/// %rhs = bitcast void* %arg1 to <type>*
5307/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5308/// store <type> %2, <type>* %lhs
5309/// ret void
5310/// }
5311/// \endcode
5312static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5314 ReductionCodeGen &RCG, unsigned N,
5315 const Expr *ReductionOp,
5316 const Expr *LHS, const Expr *RHS,
5317 const Expr *PrivateRef) {
5318 ASTContext &C = CGM.getContext();
5319 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5320 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5321 FunctionArgList Args;
5322 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5323 C.VoidPtrTy, ImplicitParamKind::Other);
5324 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5326 Args.emplace_back(&ParamInOut);
5327 Args.emplace_back(&ParamIn);
5328 const auto &FnInfo =
5329 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5330 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5331 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5332 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5333 Name, &CGM.getModule());
5334 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5335 Fn->setDoesNotRecurse();
5336 CodeGenFunction CGF(CGM);
5337 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5338 llvm::Value *Size = nullptr;
5339 // If the size of the reduction item is non-constant, load it from global
5340 // threadprivate variable.
5341 if (RCG.getSizes(N).second) {
5343 CGF, CGM.getContext().getSizeType(),
5344 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5345 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5346 CGM.getContext().getSizeType(), Loc);
5347 }
5348 RCG.emitAggregateType(CGF, N, Size);
5349 // Remap lhs and rhs variables to the addresses of the function arguments.
5350 // %lhs = bitcast void* %arg0 to <type>*
5351 // %rhs = bitcast void* %arg1 to <type>*
5352 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5353 PrivateScope.addPrivate(
5354 LHSVD,
5355 // Pull out the pointer to the variable.
5357 CGF.GetAddrOfLocalVar(&ParamInOut)
5358 .withElementType(CGF.Builder.getPtrTy(0)),
5359 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5360 PrivateScope.addPrivate(
5361 RHSVD,
5362 // Pull out the pointer to the variable.
5364 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5365 CGF.Builder.getPtrTy(0)),
5366 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5367 PrivateScope.Privatize();
5368 // Emit the combiner body:
5369 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5370 // store <type> %2, <type>* %lhs
5372 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5373 cast<DeclRefExpr>(RHS));
5374 CGF.FinishFunction();
5375 return Fn;
5376}
5377
5378/// Emits reduction finalizer function:
5379/// \code
5380/// void @.red_fini(void* %arg) {
5381/// %0 = bitcast void* %arg to <type>*
5382/// <destroy>(<type>* %0)
5383/// ret void
5384/// }
5385/// \endcode
5386static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5388 ReductionCodeGen &RCG, unsigned N) {
5389 if (!RCG.needCleanups(N))
5390 return nullptr;
5391 ASTContext &C = CGM.getContext();
5392 FunctionArgList Args;
5393 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5395 Args.emplace_back(&Param);
5396 const auto &FnInfo =
5397 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5398 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5399 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5400 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5401 Name, &CGM.getModule());
5402 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5403 Fn->setDoesNotRecurse();
5404 CodeGenFunction CGF(CGM);
5405 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5406 Address PrivateAddr = CGF.EmitLoadOfPointer(
5407 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5408 llvm::Value *Size = nullptr;
5409 // If the size of the reduction item is non-constant, load it from global
5410 // threadprivate variable.
5411 if (RCG.getSizes(N).second) {
5413 CGF, CGM.getContext().getSizeType(),
5414 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5415 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5416 CGM.getContext().getSizeType(), Loc);
5417 }
5418 RCG.emitAggregateType(CGF, N, Size);
5419 // Emit the finalizer body:
5420 // <destroy>(<type>* %0)
5421 RCG.emitCleanups(CGF, N, PrivateAddr);
5422 CGF.FinishFunction(Loc);
5423 return Fn;
5424}
5425
5428 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5429 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5430 return nullptr;
5431
5432 // Build typedef struct:
5433 // kmp_taskred_input {
5434 // void *reduce_shar; // shared reduction item
5435 // void *reduce_orig; // original reduction item used for initialization
5436 // size_t reduce_size; // size of data item
5437 // void *reduce_init; // data initialization routine
5438 // void *reduce_fini; // data finalization routine
5439 // void *reduce_comb; // data combiner routine
5440 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5441 // } kmp_taskred_input_t;
5443 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5444 RD->startDefinition();
5445 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5446 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5447 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5448 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5449 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5450 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5451 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5452 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5453 RD->completeDefinition();
5454 QualType RDType = C.getRecordType(RD);
5455 unsigned Size = Data.ReductionVars.size();
5456 llvm::APInt ArraySize(/*numBits=*/64, Size);
5457 QualType ArrayRDType =
5458 C.getConstantArrayType(RDType, ArraySize, nullptr,
5459 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5460 // kmp_task_red_input_t .rd_input.[Size];
5461 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5462 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5463 Data.ReductionCopies, Data.ReductionOps);
5464 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5465 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5466 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5467 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5468 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5469 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5470 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5471 ".rd_input.gep.");
5472 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5473 // ElemLVal.reduce_shar = &Shareds[Cnt];
5474 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5475 RCG.emitSharedOrigLValue(CGF, Cnt);
5476 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5477 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5478 // ElemLVal.reduce_orig = &Origs[Cnt];
5479 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5480 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5481 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5482 RCG.emitAggregateType(CGF, Cnt);
5483 llvm::Value *SizeValInChars;
5484 llvm::Value *SizeVal;
5485 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5486 // We use delayed creation/initialization for VLAs and array sections. It is
5487 // required because runtime does not provide the way to pass the sizes of
5488 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5489 // threadprivate global variables are used to store these values and use
5490 // them in the functions.
5491 bool DelayedCreation = !!SizeVal;
5492 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5493 /*isSigned=*/false);
5494 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5495 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5496 // ElemLVal.reduce_init = init;
5497 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5498 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5499 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5500 // ElemLVal.reduce_fini = fini;
5501 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5502 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5503 llvm::Value *FiniAddr =
5504 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5505 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5506 // ElemLVal.reduce_comb = comb;
5507 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5508 llvm::Value *CombAddr = emitReduceCombFunction(
5509 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5510 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5511 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5512 // ElemLVal.flags = 0;
5513 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5514 if (DelayedCreation) {
5516 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5517 FlagsLVal);
5518 } else
5519 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5520 }
5521 if (Data.IsReductionWithTaskMod) {
5522 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5523 // is_ws, int num, void *data);
5524 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5525 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5526 CGM.IntTy, /*isSigned=*/true);
5527 llvm::Value *Args[] = {
5528 IdentTLoc, GTid,
5529 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5530 /*isSigned=*/true),
5531 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5533 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5534 return CGF.EmitRuntimeCall(
5535 OMPBuilder.getOrCreateRuntimeFunction(
5536 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5537 Args);
5538 }
5539 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5540 llvm::Value *Args[] = {
5541 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5542 /*isSigned=*/true),
5543 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5545 CGM.VoidPtrTy)};
5546 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5547 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5548 Args);
5549}
5550
5553 bool IsWorksharingReduction) {
5554 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5555 // is_ws, int num, void *data);
5556 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5557 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5558 CGM.IntTy, /*isSigned=*/true);
5559 llvm::Value *Args[] = {IdentTLoc, GTid,
5560 llvm::ConstantInt::get(CGM.IntTy,
5561 IsWorksharingReduction ? 1 : 0,
5562 /*isSigned=*/true)};
5563 (void)CGF.EmitRuntimeCall(
5564 OMPBuilder.getOrCreateRuntimeFunction(
5565 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5566 Args);
5567}
5568
5571 ReductionCodeGen &RCG,
5572 unsigned N) {
5573 auto Sizes = RCG.getSizes(N);
5574 // Emit threadprivate global variable if the type is non-constant
5575 // (Sizes.second = nullptr).
5576 if (Sizes.second) {
5577 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5578 /*isSigned=*/false);
5580 CGF, CGM.getContext().getSizeType(),
5581 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5582 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5583 }
5584}
5585
5588 llvm::Value *ReductionsPtr,
5589 LValue SharedLVal) {
5590 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5591 // *d);
5592 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5593 CGM.IntTy,
5594 /*isSigned=*/true),
5595 ReductionsPtr,
5597 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5598 return Address(
5599 CGF.EmitRuntimeCall(
5600 OMPBuilder.getOrCreateRuntimeFunction(
5601 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5602 Args),
5603 CGF.Int8Ty, SharedLVal.getAlignment());
5604}
5605
5607 const OMPTaskDataTy &Data) {
5608 if (!CGF.HaveInsertPoint())
5609 return;
5610
5611 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5612 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5613 OMPBuilder.createTaskwait(CGF.Builder);
5614 } else {
5615 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5616 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5617 auto &M = CGM.getModule();
5618 Address DependenciesArray = Address::invalid();
5619 llvm::Value *NumOfElements;
5620 std::tie(NumOfElements, DependenciesArray) =
5621 emitDependClause(CGF, Data.Dependences, Loc);
5622 if (!Data.Dependences.empty()) {
5623 llvm::Value *DepWaitTaskArgs[7];
5624 DepWaitTaskArgs[0] = UpLoc;
5625 DepWaitTaskArgs[1] = ThreadID;
5626 DepWaitTaskArgs[2] = NumOfElements;
5627 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5628 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5629 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5630 DepWaitTaskArgs[6] =
5631 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5632
5633 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5634
5635 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5636 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5637 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5638 // kmp_int32 has_no_wait); if dependence info is specified.
5639 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5640 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5641 DepWaitTaskArgs);
5642
5643 } else {
5644
5645 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5646 // global_tid);
5647 llvm::Value *Args[] = {UpLoc, ThreadID};
5648 // Ignore return result until untied tasks are supported.
5649 CGF.EmitRuntimeCall(
5650 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5651 Args);
5652 }
5653 }
5654
5655 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5656 Region->emitUntiedSwitch(CGF);
5657}
5658
5660 OpenMPDirectiveKind InnerKind,
5661 const RegionCodeGenTy &CodeGen,
5662 bool HasCancel) {
5663 if (!CGF.HaveInsertPoint())
5664 return;
5665 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5666 InnerKind != OMPD_critical &&
5667 InnerKind != OMPD_master &&
5668 InnerKind != OMPD_masked);
5669 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5670}
5671
5672namespace {
5673enum RTCancelKind {
5674 CancelNoreq = 0,
5675 CancelParallel = 1,
5676 CancelLoop = 2,
5677 CancelSections = 3,
5678 CancelTaskgroup = 4
5679};
5680} // anonymous namespace
5681
5682static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5683 RTCancelKind CancelKind = CancelNoreq;
5684 if (CancelRegion == OMPD_parallel)
5685 CancelKind = CancelParallel;
5686 else if (CancelRegion == OMPD_for)
5687 CancelKind = CancelLoop;
5688 else if (CancelRegion == OMPD_sections)
5689 CancelKind = CancelSections;
5690 else {
5691 assert(CancelRegion == OMPD_taskgroup);
5692 CancelKind = CancelTaskgroup;
5693 }
5694 return CancelKind;
5695}
5696
5699 OpenMPDirectiveKind CancelRegion) {
5700 if (!CGF.HaveInsertPoint())
5701 return;
5702 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5703 // global_tid, kmp_int32 cncl_kind);
5704 if (auto *OMPRegionInfo =
5705 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5706 // For 'cancellation point taskgroup', the task region info may not have a
5707 // cancel. This may instead happen in another adjacent task.
5708 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5709 llvm::Value *Args[] = {
5711 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5712 // Ignore return result until untied tasks are supported.
5713 llvm::Value *Result = CGF.EmitRuntimeCall(
5714 OMPBuilder.getOrCreateRuntimeFunction(
5715 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5716 Args);
5717 // if (__kmpc_cancellationpoint()) {
5718 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5719 // exit from construct;
5720 // }
5721 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5722 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5723 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5724 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5725 CGF.EmitBlock(ExitBB);
5726 if (CancelRegion == OMPD_parallel)
5727 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5728 // exit from construct;
5729 CodeGenFunction::JumpDest CancelDest =
5730 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5731 CGF.EmitBranchThroughCleanup(CancelDest);
5732 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5733 }
5734 }
5735}
5736
5738 const Expr *IfCond,
5739 OpenMPDirectiveKind CancelRegion) {
5740 if (!CGF.HaveInsertPoint())
5741 return;
5742 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5743 // kmp_int32 cncl_kind);
5744 auto &M = CGM.getModule();
5745 if (auto *OMPRegionInfo =
5746 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5747 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5748 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5750 llvm::Value *Args[] = {
5751 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5752 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5753 // Ignore return result until untied tasks are supported.
5754 llvm::Value *Result = CGF.EmitRuntimeCall(
5755 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5756 // if (__kmpc_cancel()) {
5757 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5758 // exit from construct;
5759 // }
5760 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5761 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5762 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5763 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5764 CGF.EmitBlock(ExitBB);
5765 if (CancelRegion == OMPD_parallel)
5766 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5767 // exit from construct;
5768 CodeGenFunction::JumpDest CancelDest =
5769 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5770 CGF.EmitBranchThroughCleanup(CancelDest);
5771 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5772 };
5773 if (IfCond) {
5774 emitIfClause(CGF, IfCond, ThenGen,
5775 [](CodeGenFunction &, PrePostActionTy &) {});
5776 } else {
5777 RegionCodeGenTy ThenRCG(ThenGen);
5778 ThenRCG(CGF);
5779 }
5780 }
5781}
5782
5783namespace {
5784/// Cleanup action for uses_allocators support.
5785class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5787
5788public:
5789 OMPUsesAllocatorsActionTy(
5790 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5791 : Allocators(Allocators) {}
5792 void Enter(CodeGenFunction &CGF) override {
5793 if (!CGF.HaveInsertPoint())
5794 return;
5795 for (const auto &AllocatorData : Allocators) {
5797 CGF, AllocatorData.first, AllocatorData.second);
5798 }
5799 }
5800 void Exit(CodeGenFunction &CGF) override {
5801 if (!CGF.HaveInsertPoint())
5802 return;
5803 for (const auto &AllocatorData : Allocators) {
5805 AllocatorData.first);
5806 }
5807 }
5808};
5809} // namespace
5810
5812 const OMPExecutableDirective &D, StringRef ParentName,
5813 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5814 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5815 assert(!ParentName.empty() && "Invalid target entry parent name!");
5818 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5819 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5820 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5821 if (!D.AllocatorTraits)
5822 continue;
5823 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5824 }
5825 }
5826 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5827 CodeGen.setAction(UsesAllocatorAction);
5828 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5829 IsOffloadEntry, CodeGen);
5830}
5831
5833 const Expr *Allocator,
5834 const Expr *AllocatorTraits) {
5835 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5836 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5837 // Use default memspace handle.
5838 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5839 llvm::Value *NumTraits = llvm::ConstantInt::get(
5840 CGF.IntTy, cast<ConstantArrayType>(
5841 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5842 ->getSize()
5843 .getLimitedValue());
5844 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5846 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5847 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5848 AllocatorTraitsLVal.getBaseInfo(),
5849 AllocatorTraitsLVal.getTBAAInfo());
5850 llvm::Value *Traits = Addr.emitRawPointer(CGF);
5851
5852 llvm::Value *AllocatorVal =
5853 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5854 CGM.getModule(), OMPRTL___kmpc_init_allocator),
5855 {ThreadId, MemSpaceHandle, NumTraits, Traits});
5856 // Store to allocator.
5857 CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5858 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5859 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5860 AllocatorVal =
5861 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5862 Allocator->getType(), Allocator->getExprLoc());
5863 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5864}
5865
5867 const Expr *Allocator) {
5868 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5869 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5870 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5871 llvm::Value *AllocatorVal =
5872 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5873 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5874 CGF.getContext().VoidPtrTy,
5875 Allocator->getExprLoc());
5876 (void)CGF.EmitRuntimeCall(
5877 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5878 OMPRTL___kmpc_destroy_allocator),
5879 {ThreadId, AllocatorVal});
5880}
5881
5884 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5885 int32_t &MaxTeamsVal) {
5886
5887 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5888 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5889 /*UpperBoundOnly=*/true);
5890
5891 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5892 for (auto *A : C->getAttrs()) {
5893 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5894 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5895 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5896 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5897 &AttrMinBlocksVal, &AttrMaxBlocksVal);
5898 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5900 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5901 &AttrMaxThreadsVal);
5902 else
5903 continue;
5904
5905 MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
5906 if (AttrMaxThreadsVal > 0)
5907 MaxThreadsVal = MaxThreadsVal > 0
5908 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5909 : AttrMaxThreadsVal;
5910 MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
5911 if (AttrMaxBlocksVal > 0)
5912 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5913 : AttrMaxBlocksVal;
5914 }
5915 }
5916}
5917
5919 const OMPExecutableDirective &D, StringRef ParentName,
5920 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5921 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5922
5923 llvm::TargetRegionEntryInfo EntryInfo =
5925
5926 CodeGenFunction CGF(CGM, true);
5927 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5928 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5929 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5930
5931 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5932 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5934 };
5935
5936 llvm::Error Err = OMPBuilder.emitTargetRegionFunction(
5937 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
5938 OutlinedFnID);
5939 assert(!Err && "unexpected error creating target region");
5940
5941 if (!OutlinedFn)
5942 return;
5943
5944 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5945
5946 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5947 for (auto *A : C->getAttrs()) {
5948 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5949 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5950 }
5951 }
5952}
5953
5954/// Checks if the expression is constant or does not have non-trivial function
5955/// calls.
5956static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5957 // We can skip constant expressions.
5958 // We can skip expressions with trivial calls or simple expressions.
5960 !E->hasNonTrivialCall(Ctx)) &&
5961 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5962}
5963
5965 const Stmt *Body) {
5966 const Stmt *Child = Body->IgnoreContainers();
5967 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5968 Child = nullptr;
5969 for (const Stmt *S : C->body()) {
5970 if (const auto *E = dyn_cast<Expr>(S)) {
5971 if (isTrivial(Ctx, E))
5972 continue;
5973 }
5974 // Some of the statements can be ignored.
5975 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5976 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5977 continue;
5978 // Analyze declarations.
5979 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5980 if (llvm::all_of(DS->decls(), [](const Decl *D) {
5981 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5982 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5983 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5984 isa<UsingDirectiveDecl>(D) ||
5985 isa<OMPDeclareReductionDecl>(D) ||
5986 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
5987 return true;
5988 const auto *VD = dyn_cast<VarDecl>(D);
5989 if (!VD)
5990 return false;
5991 return VD->hasGlobalStorage() || !VD->isUsed();
5992 }))
5993 continue;
5994 }
5995 // Found multiple children - cannot get the one child only.
5996 if (Child)
5997 return nullptr;
5998 Child = S;
5999 }
6000 if (Child)
6001 Child = Child->IgnoreContainers();
6002 }
6003 return Child;
6004}
6005
6007 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6008 int32_t &MaxTeamsVal) {
6009
6010 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6011 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6012 "Expected target-based executable directive.");
6013 switch (DirectiveKind) {
6014 case OMPD_target: {
6015 const auto *CS = D.getInnermostCapturedStmt();
6016 const auto *Body =
6017 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6018 const Stmt *ChildStmt =
6020 if (const auto *NestedDir =
6021 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6022 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6023 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6024 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6025 ->getNumTeams()
6026 .front();
6027 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6028 if (auto Constant =
6029 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6030 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6031 return NumTeams;
6032 }
6033 MinTeamsVal = MaxTeamsVal = 0;
6034 return nullptr;
6035 }
6036 MinTeamsVal = MaxTeamsVal = 1;
6037 return nullptr;
6038 }
6039 // A value of -1 is used to check if we need to emit no teams region
6040 MinTeamsVal = MaxTeamsVal = -1;
6041 return nullptr;
6042 }
6043 case OMPD_target_teams_loop:
6044 case OMPD_target_teams:
6045 case OMPD_target_teams_distribute:
6046 case OMPD_target_teams_distribute_simd:
6047 case OMPD_target_teams_distribute_parallel_for:
6048 case OMPD_target_teams_distribute_parallel_for_simd: {
6049 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6050 const Expr *NumTeams =
6051 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6052 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6053 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6054 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6055 return NumTeams;
6056 }
6057 MinTeamsVal = MaxTeamsVal = 0;
6058 return nullptr;
6059 }
6060 case OMPD_target_parallel:
6061 case OMPD_target_parallel_for:
6062 case OMPD_target_parallel_for_simd:
6063 case OMPD_target_parallel_loop:
6064 case OMPD_target_simd:
6065 MinTeamsVal = MaxTeamsVal = 1;
6066 return nullptr;
6067 case OMPD_parallel:
6068 case OMPD_for:
6069 case OMPD_parallel_for:
6070 case OMPD_parallel_loop:
6071 case OMPD_parallel_master:
6072 case OMPD_parallel_sections:
6073 case OMPD_for_simd:
6074 case OMPD_parallel_for_simd:
6075 case OMPD_cancel:
6076 case OMPD_cancellation_point:
6077 case OMPD_ordered:
6078 case OMPD_threadprivate:
6079 case OMPD_allocate:
6080 case OMPD_task:
6081 case OMPD_simd:
6082 case OMPD_tile:
6083 case OMPD_unroll:
6084 case OMPD_sections:
6085 case OMPD_section:
6086 case OMPD_single:
6087 case OMPD_master:
6088 case OMPD_critical:
6089 case OMPD_taskyield:
6090 case OMPD_barrier:
6091 case OMPD_taskwait:
6092 case OMPD_taskgroup:
6093 case OMPD_atomic:
6094 case OMPD_flush:
6095 case OMPD_depobj:
6096 case OMPD_scan:
6097 case OMPD_teams:
6098 case OMPD_target_data:
6099 case OMPD_target_exit_data:
6100 case OMPD_target_enter_data:
6101 case OMPD_distribute:
6102 case OMPD_distribute_simd:
6103 case OMPD_distribute_parallel_for:
6104 case OMPD_distribute_parallel_for_simd:
6105 case OMPD_teams_distribute:
6106 case OMPD_teams_distribute_simd:
6107 case OMPD_teams_distribute_parallel_for:
6108 case OMPD_teams_distribute_parallel_for_simd:
6109 case OMPD_target_update:
6110 case OMPD_declare_simd:
6111 case OMPD_declare_variant:
6112 case OMPD_begin_declare_variant:
6113 case OMPD_end_declare_variant:
6114 case OMPD_declare_target:
6115 case OMPD_end_declare_target:
6116 case OMPD_declare_reduction:
6117 case OMPD_declare_mapper:
6118 case OMPD_taskloop:
6119 case OMPD_taskloop_simd:
6120 case OMPD_master_taskloop:
6121 case OMPD_master_taskloop_simd:
6122 case OMPD_parallel_master_taskloop:
6123 case OMPD_parallel_master_taskloop_simd:
6124 case OMPD_requires:
6125 case OMPD_metadirective:
6126 case OMPD_unknown:
6127 break;
6128 default:
6129 break;
6130 }
6131 llvm_unreachable("Unexpected directive kind.");
6132}
6133
6136 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6137 "Clauses associated with the teams directive expected to be emitted "
6138 "only for the host!");
6139 CGBuilderTy &Bld = CGF.Builder;
6140 int32_t MinNT = -1, MaxNT = -1;
6141 const Expr *NumTeams =
6142 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6143 if (NumTeams != nullptr) {
6144 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6145
6146 switch (DirectiveKind) {
6147 case OMPD_target: {
6148 const auto *CS = D.getInnermostCapturedStmt();
6149 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6150 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6151 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6152 /*IgnoreResultAssign*/ true);
6153 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6154 /*isSigned=*/true);
6155 }
6156 case OMPD_target_teams:
6157 case OMPD_target_teams_distribute:
6158 case OMPD_target_teams_distribute_simd:
6159 case OMPD_target_teams_distribute_parallel_for:
6160 case OMPD_target_teams_distribute_parallel_for_simd: {
6161 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6162 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6163 /*IgnoreResultAssign*/ true);
6164 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6165 /*isSigned=*/true);
6166 }
6167 default:
6168 break;
6169 }
6170 }
6171
6172 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6173 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6174}
6175
6176/// Check for a num threads constant value (stored in \p DefaultVal), or
6177/// expression (stored in \p E). If the value is conditional (via an if-clause),
6178/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6179/// nullptr, no expression evaluation is perfomed.
6180static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6181 const Expr **E, int32_t &UpperBound,
6182 bool UpperBoundOnly, llvm::Value **CondVal) {
6184 CGF.getContext(), CS->getCapturedStmt());
6185 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6186 if (!Dir)
6187 return;
6188
6189 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6190 // Handle if clause. If if clause present, the number of threads is
6191 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6192 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6193 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6194 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6195 const OMPIfClause *IfClause = nullptr;
6196 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6197 if (C->getNameModifier() == OMPD_unknown ||
6198 C->getNameModifier() == OMPD_parallel) {
6199 IfClause = C;
6200 break;
6201 }
6202 }
6203 if (IfClause) {
6204 const Expr *CondExpr = IfClause->getCondition();
6205 bool Result;
6206 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6207 if (!Result) {
6208 UpperBound = 1;
6209 return;
6210 }
6211 } else {
6212 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6213 if (const auto *PreInit =
6214 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6215 for (const auto *I : PreInit->decls()) {
6216 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6217 CGF.EmitVarDecl(cast<VarDecl>(*I));
6218 } else {
6219 CodeGenFunction::AutoVarEmission Emission =
6220 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6221 CGF.EmitAutoVarCleanups(Emission);
6222 }
6223 }
6224 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6225 }
6226 }
6227 }
6228 }
6229 // Check the value of num_threads clause iff if clause was not specified
6230 // or is not evaluated to false.
6231 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6232 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6233 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6234 const auto *NumThreadsClause =
6235 Dir->getSingleClause<OMPNumThreadsClause>();
6236 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6237 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6238 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6239 UpperBound =
6240 UpperBound
6241 ? Constant->getZExtValue()
6242 : std::min(UpperBound,
6243 static_cast<int32_t>(Constant->getZExtValue()));
6244 // If we haven't found a upper bound, remember we saw a thread limiting
6245 // clause.
6246 if (UpperBound == -1)
6247 UpperBound = 0;
6248 if (!E)
6249 return;
6250 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6251 if (const auto *PreInit =
6252 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6253 for (const auto *I : PreInit->decls()) {
6254 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6255 CGF.EmitVarDecl(cast<VarDecl>(*I));
6256 } else {
6257 CodeGenFunction::AutoVarEmission Emission =
6258 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6259 CGF.EmitAutoVarCleanups(Emission);
6260 }
6261 }
6262 }
6263 *E = NTExpr;
6264 }
6265 return;
6266 }
6267 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6268 UpperBound = 1;
6269}
6270
6272 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6273 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6274 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6275 "Clauses associated with the teams directive expected to be emitted "
6276 "only for the host!");
6277 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6278 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6279 "Expected target-based executable directive.");
6280
6281 const Expr *NT = nullptr;
6282 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6283
6284 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6285 if (E->isIntegerConstantExpr(CGF.getContext())) {
6286 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6287 UpperBound = UpperBound ? Constant->getZExtValue()
6288 : std::min(UpperBound,
6289 int32_t(Constant->getZExtValue()));
6290 }
6291 // If we haven't found a upper bound, remember we saw a thread limiting
6292 // clause.
6293 if (UpperBound == -1)
6294 UpperBound = 0;
6295 if (EPtr)
6296 *EPtr = E;
6297 };
6298
6299 auto ReturnSequential = [&]() {
6300 UpperBound = 1;
6301 return NT;
6302 };
6303
6304 switch (DirectiveKind) {
6305 case OMPD_target: {
6306 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6307 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6309 CGF.getContext(), CS->getCapturedStmt());
6310 // TODO: The standard is not clear how to resolve two thread limit clauses,
6311 // let's pick the teams one if it's present, otherwise the target one.
6312 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6313 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6314 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6315 ThreadLimitClause = TLC;
6316 if (ThreadLimitExpr) {
6317 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6318 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6320 CGF,
6321 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6322 if (const auto *PreInit =
6323 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6324 for (const auto *I : PreInit->decls()) {
6325 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6326 CGF.EmitVarDecl(cast<VarDecl>(*I));
6327 } else {
6329 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6330 CGF.EmitAutoVarCleanups(Emission);
6331 }
6332 }
6333 }
6334 }
6335 }
6336 }
6337 if (ThreadLimitClause)
6338 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6339 ThreadLimitExpr);
6340 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6341 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6342 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6343 CS = Dir->getInnermostCapturedStmt();
6345 CGF.getContext(), CS->getCapturedStmt());
6346 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6347 }
6348 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6349 CS = Dir->getInnermostCapturedStmt();
6350 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6351 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6352 return ReturnSequential();
6353 }
6354 return NT;
6355 }
6356 case OMPD_target_teams: {
6357 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6358 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6359 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6360 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6361 ThreadLimitExpr);
6362 }
6363 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6364 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6366 CGF.getContext(), CS->getCapturedStmt());
6367 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6368 if (Dir->getDirectiveKind() == OMPD_distribute) {
6369 CS = Dir->getInnermostCapturedStmt();
6370 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6371 }
6372 }
6373 return NT;
6374 }
6375 case OMPD_target_teams_distribute:
6376 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6377 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6378 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6379 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6380 ThreadLimitExpr);
6381 }
6382 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6383 UpperBoundOnly, CondVal);
6384 return NT;
6385 case OMPD_target_teams_loop:
6386 case OMPD_target_parallel_loop:
6387 case OMPD_target_parallel:
6388 case OMPD_target_parallel_for:
6389 case OMPD_target_parallel_for_simd:
6390 case OMPD_target_teams_distribute_parallel_for:
6391 case OMPD_target_teams_distribute_parallel_for_simd: {
6392 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6393 const OMPIfClause *IfClause = nullptr;
6394 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6395 if (C->getNameModifier() == OMPD_unknown ||
6396 C->getNameModifier() == OMPD_parallel) {
6397 IfClause = C;
6398 break;
6399 }
6400 }
6401 if (IfClause) {
6402 const Expr *Cond = IfClause->getCondition();
6403 bool Result;
6404 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6405 if (!Result)
6406 return ReturnSequential();
6407 } else {
6409 *CondVal = CGF.EvaluateExprAsBool(Cond);
6410 }
6411 }
6412 }
6413 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6414 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6415 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6416 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6417 ThreadLimitExpr);
6418 }
6419 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6420 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6421 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6422 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6423 return NumThreadsClause->getNumThreads();
6424 }
6425 return NT;
6426 }
6427 case OMPD_target_teams_distribute_simd:
6428 case OMPD_target_simd:
6429 return ReturnSequential();
6430 default:
6431 break;
6432 }
6433 llvm_unreachable("Unsupported directive kind.");
6434}
6435
6438 llvm::Value *NumThreadsVal = nullptr;
6439 llvm::Value *CondVal = nullptr;
6440 llvm::Value *ThreadLimitVal = nullptr;
6441 const Expr *ThreadLimitExpr = nullptr;
6442 int32_t UpperBound = -1;
6443
6445 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6446 &ThreadLimitExpr);
6447
6448 // Thread limit expressions are used below, emit them.
6449 if (ThreadLimitExpr) {
6450 ThreadLimitVal =
6451 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6452 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6453 /*isSigned=*/false);
6454 }
6455
6456 // Generate the num teams expression.
6457 if (UpperBound == 1) {
6458 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6459 } else if (NT) {
6460 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6461 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6462 /*isSigned=*/false);
6463 } else if (ThreadLimitVal) {
6464 // If we do not have a num threads value but a thread limit, replace the
6465 // former with the latter. We know handled the thread limit expression.
6466 NumThreadsVal = ThreadLimitVal;
6467 ThreadLimitVal = nullptr;
6468 } else {
6469 // Default to "0" which means runtime choice.
6470 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6471 NumThreadsVal = CGF.Builder.getInt32(0);
6472 }
6473
6474 // Handle if clause. If if clause present, the number of threads is
6475 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6476 if (CondVal) {
6478 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6479 CGF.Builder.getInt32(1));
6480 }
6481
6482 // If the thread limit and num teams expression were present, take the
6483 // minimum.
6484 if (ThreadLimitVal) {
6485 NumThreadsVal = CGF.Builder.CreateSelect(
6486 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6487 ThreadLimitVal, NumThreadsVal);
6488 }
6489
6490 return NumThreadsVal;
6491}
6492
6493namespace {
6495
6496// Utility to handle information from clauses associated with a given
6497// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6498// It provides a convenient interface to obtain the information and generate
6499// code for that information.
6500class MappableExprsHandler {
6501public:
6502 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6503 static unsigned getFlagMemberOffset() {
6504 unsigned Offset = 0;
6505 for (uint64_t Remain =
6506 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6507 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6508 !(Remain & 1); Remain = Remain >> 1)
6509 Offset++;
6510 return Offset;
6511 }
6512
6513 /// Class that holds debugging information for a data mapping to be passed to
6514 /// the runtime library.
6515 class MappingExprInfo {
6516 /// The variable declaration used for the data mapping.
6517 const ValueDecl *MapDecl = nullptr;
6518 /// The original expression used in the map clause, or null if there is
6519 /// none.
6520 const Expr *MapExpr = nullptr;
6521
6522 public:
6523 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6524 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6525
6526 const ValueDecl *getMapDecl() const { return MapDecl; }
6527 const Expr *getMapExpr() const { return MapExpr; }
6528 };
6529
6530 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6531 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6532 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6533 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6534 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6535 using MapNonContiguousArrayTy =
6536 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6537 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6538 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6539
6540 /// This structure contains combined information generated for mappable
6541 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6542 /// mappers, and non-contiguous information.
6543 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6544 MapExprsArrayTy Exprs;
6545 MapValueDeclsArrayTy Mappers;
6546 MapValueDeclsArrayTy DevicePtrDecls;
6547
6548 /// Append arrays in \a CurInfo.
6549 void append(MapCombinedInfoTy &CurInfo) {
6550 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6551 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6552 CurInfo.DevicePtrDecls.end());
6553 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6554 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6555 }
6556 };
6557
6558 /// Map between a struct and the its lowest & highest elements which have been
6559 /// mapped.
6560 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6561 /// HE(FieldIndex, Pointer)}
6562 struct StructRangeInfoTy {
6563 MapCombinedInfoTy PreliminaryMapData;
6564 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6565 0, Address::invalid()};
6566 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6567 0, Address::invalid()};
6570 bool IsArraySection = false;
6571 bool HasCompleteRecord = false;
6572 };
6573
6574private:
6575 /// Kind that defines how a device pointer has to be returned.
6576 struct MapInfo {
6580 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6581 bool ReturnDevicePointer = false;
6582 bool IsImplicit = false;
6583 const ValueDecl *Mapper = nullptr;
6584 const Expr *VarRef = nullptr;
6585 bool ForDeviceAddr = false;
6586
6587 MapInfo() = default;
6588 MapInfo(
6590 OpenMPMapClauseKind MapType,
6592 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6593 bool ReturnDevicePointer, bool IsImplicit,
6594 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6595 bool ForDeviceAddr = false)
6596 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6597 MotionModifiers(MotionModifiers),
6598 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6599 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6600 };
6601
6602 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6603 /// member and there is no map information about it, then emission of that
6604 /// entry is deferred until the whole struct has been processed.
6605 struct DeferredDevicePtrEntryTy {
6606 const Expr *IE = nullptr;
6607 const ValueDecl *VD = nullptr;
6608 bool ForDeviceAddr = false;
6609
6610 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6611 bool ForDeviceAddr)
6612 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6613 };
6614
6615 /// The target directive from where the mappable clauses were extracted. It
6616 /// is either a executable directive or a user-defined mapper directive.
6617 llvm::PointerUnion<const OMPExecutableDirective *,
6618 const OMPDeclareMapperDecl *>
6619 CurDir;
6620
6621 /// Function the directive is being generated for.
6622 CodeGenFunction &CGF;
6623
6624 /// Set of all first private variables in the current directive.
6625 /// bool data is set to true if the variable is implicitly marked as
6626 /// firstprivate, false otherwise.
6627 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6628
6629 /// Map between device pointer declarations and their expression components.
6630 /// The key value for declarations in 'this' is null.
6631 llvm::DenseMap<
6632 const ValueDecl *,
6634 DevPointersMap;
6635
6636 /// Map between device addr declarations and their expression components.
6637 /// The key value for declarations in 'this' is null.
6638 llvm::DenseMap<
6639 const ValueDecl *,
6641 HasDevAddrsMap;
6642
6643 /// Map between lambda declarations and their map type.
6644 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6645
6646 llvm::Value *getExprTypeSize(const Expr *E) const {
6647 QualType ExprTy = E->getType().getCanonicalType();
6648
6649 // Calculate the size for array shaping expression.
6650 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6651 llvm::Value *Size =
6652 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6653 for (const Expr *SE : OAE->getDimensions()) {
6654 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6655 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6656 CGF.getContext().getSizeType(),
6657 SE->getExprLoc());
6658 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6659 }
6660 return Size;
6661 }
6662
6663 // Reference types are ignored for mapping purposes.
6664 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6665 ExprTy = RefTy->getPointeeType().getCanonicalType();
6666
6667 // Given that an array section is considered a built-in type, we need to
6668 // do the calculation based on the length of the section instead of relying
6669 // on CGF.getTypeSize(E->getType()).
6670 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6672 OAE->getBase()->IgnoreParenImpCasts())
6674
6675 // If there is no length associated with the expression and lower bound is
6676 // not specified too, that means we are using the whole length of the
6677 // base.
6678 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6679 !OAE->getLowerBound())
6680 return CGF.getTypeSize(BaseTy);
6681
6682 llvm::Value *ElemSize;
6683 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6684 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6685 } else {
6686 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6687 assert(ATy && "Expecting array type if not a pointer type.");
6688 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6689 }
6690
6691 // If we don't have a length at this point, that is because we have an
6692 // array section with a single element.
6693 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6694 return ElemSize;
6695
6696 if (const Expr *LenExpr = OAE->getLength()) {
6697 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6698 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6699 CGF.getContext().getSizeType(),
6700 LenExpr->getExprLoc());
6701 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6702 }
6703 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6704 OAE->getLowerBound() && "expected array_section[lb:].");
6705 // Size = sizetype - lb * elemtype;
6706 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6707 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6708 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6709 CGF.getContext().getSizeType(),
6710 OAE->getLowerBound()->getExprLoc());
6711 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6712 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6713 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6714 LengthVal = CGF.Builder.CreateSelect(
6715 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6716 return LengthVal;
6717 }
6718 return CGF.getTypeSize(ExprTy);
6719 }
6720
6721 /// Return the corresponding bits for a given map clause modifier. Add
6722 /// a flag marking the map as a pointer if requested. Add a flag marking the
6723 /// map as the first one of a series of maps that relate to the same map
6724 /// expression.
6725 OpenMPOffloadMappingFlags getMapTypeBits(
6727 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6728 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6729 OpenMPOffloadMappingFlags Bits =
6730 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6731 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6732 switch (MapType) {
6733 case OMPC_MAP_alloc:
6734 case OMPC_MAP_release:
6735 // alloc and release is the default behavior in the runtime library, i.e.
6736 // if we don't pass any bits alloc/release that is what the runtime is
6737 // going to do. Therefore, we don't need to signal anything for these two
6738 // type modifiers.
6739 break;
6740 case OMPC_MAP_to:
6741 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6742 break;
6743 case OMPC_MAP_from:
6744 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6745 break;
6746 case OMPC_MAP_tofrom:
6747 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6748 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6749 break;
6750 case OMPC_MAP_delete:
6751 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6752 break;
6753 case OMPC_MAP_unknown:
6754 llvm_unreachable("Unexpected map type!");
6755 }
6756 if (AddPtrFlag)
6757 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6758 if (AddIsTargetParamFlag)
6759 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6760 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6761 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6762 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6763 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6764 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6765 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6766 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6767 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6768 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6769 if (IsNonContiguous)
6770 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6771 return Bits;
6772 }
6773
6774 /// Return true if the provided expression is a final array section. A
6775 /// final array section, is one whose length can't be proved to be one.
6776 bool isFinalArraySectionExpression(const Expr *E) const {
6777 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
6778
6779 // It is not an array section and therefore not a unity-size one.
6780 if (!OASE)
6781 return false;
6782
6783 // An array section with no colon always refer to a single element.
6784 if (OASE->getColonLocFirst().isInvalid())
6785 return false;
6786
6787 const Expr *Length = OASE->getLength();
6788
6789 // If we don't have a length we have to check if the array has size 1
6790 // for this dimension. Also, we should always expect a length if the
6791 // base type is pointer.
6792 if (!Length) {
6794 OASE->getBase()->IgnoreParenImpCasts())
6796 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6797 return ATy->getSExtSize() != 1;
6798 // If we don't have a constant dimension length, we have to consider
6799 // the current section as having any size, so it is not necessarily
6800 // unitary. If it happen to be unity size, that's user fault.
6801 return true;
6802 }
6803
6804 // Check if the length evaluates to 1.
6805 Expr::EvalResult Result;
6806 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6807 return true; // Can have more that size 1.
6808
6809 llvm::APSInt ConstLength = Result.Val.getInt();
6810 return ConstLength.getSExtValue() != 1;
6811 }
6812
6813 /// Generate the base pointers, section pointers, sizes, map type bits, and
6814 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6815 /// map type, map or motion modifiers, and expression components.
6816 /// \a IsFirstComponent should be set to true if the provided set of
6817 /// components is the first associated with a capture.
6818 void generateInfoForComponentList(
6820 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6822 MapCombinedInfoTy &CombinedInfo,
6823 MapCombinedInfoTy &StructBaseCombinedInfo,
6824 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6825 bool IsImplicit, bool GenerateAllInfoForClauses,
6826 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6827 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6829 OverlappedElements = {},
6830 bool AreBothBasePtrAndPteeMapped = false) const {
6831 // The following summarizes what has to be generated for each map and the
6832 // types below. The generated information is expressed in this order:
6833 // base pointer, section pointer, size, flags
6834 // (to add to the ones that come from the map type and modifier).
6835 //
6836 // double d;
6837 // int i[100];
6838 // float *p;
6839 // int **a = &i;
6840 //
6841 // struct S1 {
6842 // int i;
6843 // float f[50];
6844 // }
6845 // struct S2 {
6846 // int i;
6847 // float f[50];
6848 // S1 s;
6849 // double *p;
6850 // struct S2 *ps;
6851 // int &ref;
6852 // }
6853 // S2 s;
6854 // S2 *ps;
6855 //
6856 // map(d)
6857 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6858 //
6859 // map(i)
6860 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6861 //
6862 // map(i[1:23])
6863 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6864 //
6865 // map(p)
6866 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6867 //
6868 // map(p[1:24])
6869 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6870 // in unified shared memory mode or for local pointers
6871 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6872 //
6873 // map((*a)[0:3])
6874 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6875 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6876 //
6877 // map(**a)
6878 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6879 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6880 //
6881 // map(s)
6882 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6883 //
6884 // map(s.i)
6885 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6886 //
6887 // map(s.s.f)
6888 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6889 //
6890 // map(s.p)
6891 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6892 //
6893 // map(to: s.p[:22])
6894 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6895 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6896 // &(s.p), &(s.p[0]), 22*sizeof(double),
6897 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6898 // (*) alloc space for struct members, only this is a target parameter
6899 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6900 // optimizes this entry out, same in the examples below)
6901 // (***) map the pointee (map: to)
6902 //
6903 // map(to: s.ref)
6904 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6905 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6906 // (*) alloc space for struct members, only this is a target parameter
6907 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6908 // optimizes this entry out, same in the examples below)
6909 // (***) map the pointee (map: to)
6910 //
6911 // map(s.ps)
6912 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6913 //
6914 // map(from: s.ps->s.i)
6915 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6916 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6917 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6918 //
6919 // map(to: s.ps->ps)
6920 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6921 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6922 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6923 //
6924 // map(s.ps->ps->ps)
6925 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6926 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6927 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6928 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6929 //
6930 // map(to: s.ps->ps->s.f[:22])
6931 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6932 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6933 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6934 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6935 //
6936 // map(ps)
6937 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6938 //
6939 // map(ps->i)
6940 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6941 //
6942 // map(ps->s.f)
6943 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6944 //
6945 // map(from: ps->p)
6946 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6947 //
6948 // map(to: ps->p[:22])
6949 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6950 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6951 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6952 //
6953 // map(ps->ps)
6954 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6955 //
6956 // map(from: ps->ps->s.i)
6957 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6958 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6959 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6960 //
6961 // map(from: ps->ps->ps)
6962 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6963 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6964 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6965 //
6966 // map(ps->ps->ps->ps)
6967 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6968 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6969 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6970 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6971 //
6972 // map(to: ps->ps->ps->s.f[:22])
6973 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6974 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6975 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6976 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6977 //
6978 // map(to: s.f[:22]) map(from: s.p[:33])
6979 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6980 // sizeof(double*) (**), TARGET_PARAM
6981 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6982 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6983 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6984 // (*) allocate contiguous space needed to fit all mapped members even if
6985 // we allocate space for members not mapped (in this example,
6986 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
6987 // them as well because they fall between &s.f[0] and &s.p)
6988 //
6989 // map(from: s.f[:22]) map(to: ps->p[:33])
6990 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6991 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6992 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6993 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6994 // (*) the struct this entry pertains to is the 2nd element in the list of
6995 // arguments, hence MEMBER_OF(2)
6996 //
6997 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6998 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
6999 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7000 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7001 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7002 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7003 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7004 // (*) the struct this entry pertains to is the 4th element in the list
7005 // of arguments, hence MEMBER_OF(4)
7006 //
7007 // map(p, p[:100])
7008 // ===> map(p[:100])
7009 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7010
7011 // Track if the map information being generated is the first for a capture.
7012 bool IsCaptureFirstInfo = IsFirstComponentList;
7013 // When the variable is on a declare target link or in a to clause with
7014 // unified memory, a reference is needed to hold the host/device address
7015 // of the variable.
7016 bool RequiresReference = false;
7017
7018 // Scan the components from the base to the complete expression.
7019 auto CI = Components.rbegin();
7020 auto CE = Components.rend();
7021 auto I = CI;
7022
7023 // Track if the map information being generated is the first for a list of
7024 // components.
7025 bool IsExpressionFirstInfo = true;
7026 bool FirstPointerInComplexData = false;
7028 const Expr *AssocExpr = I->getAssociatedExpression();
7029 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7030 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7031 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7032
7033 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7034 return;
7035 if (isa<MemberExpr>(AssocExpr)) {
7036 // The base is the 'this' pointer. The content of the pointer is going
7037 // to be the base of the field being mapped.
7038 BP = CGF.LoadCXXThisAddress();
7039 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7040 (OASE &&
7041 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7042 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7043 } else if (OAShE &&
7044 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7045 BP = Address(
7046 CGF.EmitScalarExpr(OAShE->getBase()),
7047 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7048 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7049 } else {
7050 // The base is the reference to the variable.
7051 // BP = &Var.
7052 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7053 if (const auto *VD =
7054 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7055 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7056 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7057 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7058 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7059 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7061 RequiresReference = true;
7063 }
7064 }
7065 }
7066
7067 // If the variable is a pointer and is being dereferenced (i.e. is not
7068 // the last component), the base has to be the pointer itself, not its
7069 // reference. References are ignored for mapping purposes.
7070 QualType Ty =
7071 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7072 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7073 // No need to generate individual map information for the pointer, it
7074 // can be associated with the combined storage if shared memory mode is
7075 // active or the base declaration is not global variable.
7076 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7077 if (!AreBothBasePtrAndPteeMapped &&
7079 !VD || VD->hasLocalStorage()))
7080 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7081 else
7082 FirstPointerInComplexData = true;
7083 ++I;
7084 }
7085 }
7086
7087 // Track whether a component of the list should be marked as MEMBER_OF some
7088 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7089 // in a component list should be marked as MEMBER_OF, all subsequent entries
7090 // do not belong to the base struct. E.g.
7091 // struct S2 s;
7092 // s.ps->ps->ps->f[:]
7093 // (1) (2) (3) (4)
7094 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7095 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7096 // is the pointee of ps(2) which is not member of struct s, so it should not
7097 // be marked as such (it is still PTR_AND_OBJ).
7098 // The variable is initialized to false so that PTR_AND_OBJ entries which
7099 // are not struct members are not considered (e.g. array of pointers to
7100 // data).
7101 bool ShouldBeMemberOf = false;
7102
7103 // Variable keeping track of whether or not we have encountered a component
7104 // in the component list which is a member expression. Useful when we have a
7105 // pointer or a final array section, in which case it is the previous
7106 // component in the list which tells us whether we have a member expression.
7107 // E.g. X.f[:]
7108 // While processing the final array section "[:]" it is "f" which tells us
7109 // whether we are dealing with a member of a declared struct.
7110 const MemberExpr *EncounteredME = nullptr;
7111
7112 // Track for the total number of dimension. Start from one for the dummy
7113 // dimension.
7114 uint64_t DimSize = 1;
7115
7116 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7117 bool IsPrevMemberReference = false;
7118
7119 bool IsPartialMapped =
7120 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7121
7122 // We need to check if we will be encountering any MEs. If we do not
7123 // encounter any ME expression it means we will be mapping the whole struct.
7124 // In that case we need to skip adding an entry for the struct to the
7125 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7126 // list only when generating all info for clauses.
7127 bool IsMappingWholeStruct = true;
7128 if (!GenerateAllInfoForClauses) {
7129 IsMappingWholeStruct = false;
7130 } else {
7131 for (auto TempI = I; TempI != CE; ++TempI) {
7132 const MemberExpr *PossibleME =
7133 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7134 if (PossibleME) {
7135 IsMappingWholeStruct = false;
7136 break;
7137 }
7138 }
7139 }
7140
7141 for (; I != CE; ++I) {
7142 // If the current component is member of a struct (parent struct) mark it.
7143 if (!EncounteredME) {
7144 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7145 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7146 // as MEMBER_OF the parent struct.
7147 if (EncounteredME) {
7148 ShouldBeMemberOf = true;
7149 // Do not emit as complex pointer if this is actually not array-like
7150 // expression.
7151 if (FirstPointerInComplexData) {
7152 QualType Ty = std::prev(I)
7153 ->getAssociatedDeclaration()
7154 ->getType()
7155 .getNonReferenceType();
7156 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7157 FirstPointerInComplexData = false;
7158 }
7159 }
7160 }
7161
7162 auto Next = std::next(I);
7163
7164 // We need to generate the addresses and sizes if this is the last
7165 // component, if the component is a pointer or if it is an array section
7166 // whose length can't be proved to be one. If this is a pointer, it
7167 // becomes the base address for the following components.
7168
7169 // A final array section, is one whose length can't be proved to be one.
7170 // If the map item is non-contiguous then we don't treat any array section
7171 // as final array section.
7172 bool IsFinalArraySection =
7173 !IsNonContiguous &&
7174 isFinalArraySectionExpression(I->getAssociatedExpression());
7175
7176 // If we have a declaration for the mapping use that, otherwise use
7177 // the base declaration of the map clause.
7178 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7179 ? I->getAssociatedDeclaration()
7180 : BaseDecl;
7181 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7182 : MapExpr;
7183
7184 // Get information on whether the element is a pointer. Have to do a
7185 // special treatment for array sections given that they are built-in
7186 // types.
7187 const auto *OASE =
7188 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7189 const auto *OAShE =
7190 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7191 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7192 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7193 bool IsPointer =
7194 OAShE ||
7197 ->isAnyPointerType()) ||
7198 I->getAssociatedExpression()->getType()->isAnyPointerType();
7199 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7200 MapDecl &&
7201 MapDecl->getType()->isLValueReferenceType();
7202 bool IsNonDerefPointer = IsPointer &&
7203 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7204 !IsNonContiguous;
7205
7206 if (OASE)
7207 ++DimSize;
7208
7209 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7210 IsFinalArraySection) {
7211 // If this is not the last component, we expect the pointer to be
7212 // associated with an array expression or member expression.
7213 assert((Next == CE ||
7214 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7215 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7216 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7217 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7218 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7219 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7220 "Unexpected expression");
7221
7223 Address LowestElem = Address::invalid();
7224 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7225 const MemberExpr *E) {
7226 const Expr *BaseExpr = E->getBase();
7227 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7228 // scalar.
7229 LValue BaseLV;
7230 if (E->isArrow()) {
7231 LValueBaseInfo BaseInfo;
7232 TBAAAccessInfo TBAAInfo;
7233 Address Addr =
7234 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7235 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7236 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7237 } else {
7238 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7239 }
7240 return BaseLV;
7241 };
7242 if (OAShE) {
7243 LowestElem = LB =
7244 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7246 OAShE->getBase()->getType()->getPointeeType()),
7248 OAShE->getBase()->getType()));
7249 } else if (IsMemberReference) {
7250 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7251 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7252 LowestElem = CGF.EmitLValueForFieldInitialization(
7253 BaseLVal, cast<FieldDecl>(MapDecl))
7254 .getAddress();
7255 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7256 .getAddress();
7257 } else {
7258 LowestElem = LB =
7259 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7260 .getAddress();
7261 }
7262
7263 // If this component is a pointer inside the base struct then we don't
7264 // need to create any entry for it - it will be combined with the object
7265 // it is pointing to into a single PTR_AND_OBJ entry.
7266 bool IsMemberPointerOrAddr =
7267 EncounteredME &&
7268 (((IsPointer || ForDeviceAddr) &&
7269 I->getAssociatedExpression() == EncounteredME) ||
7270 (IsPrevMemberReference && !IsPointer) ||
7271 (IsMemberReference && Next != CE &&
7272 !Next->getAssociatedExpression()->getType()->isPointerType()));
7273 if (!OverlappedElements.empty() && Next == CE) {
7274 // Handle base element with the info for overlapped elements.
7275 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7276 assert(!IsPointer &&
7277 "Unexpected base element with the pointer type.");
7278 // Mark the whole struct as the struct that requires allocation on the
7279 // device.
7280 PartialStruct.LowestElem = {0, LowestElem};
7281 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7282 I->getAssociatedExpression()->getType());
7285 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7286 TypeSize.getQuantity() - 1);
7287 PartialStruct.HighestElem = {
7288 std::numeric_limits<decltype(
7289 PartialStruct.HighestElem.first)>::max(),
7290 HB};
7291 PartialStruct.Base = BP;
7292 PartialStruct.LB = LB;
7293 assert(
7294 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7295 "Overlapped elements must be used only once for the variable.");
7296 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7297 // Emit data for non-overlapped data.
7298 OpenMPOffloadMappingFlags Flags =
7299 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7300 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7301 /*AddPtrFlag=*/false,
7302 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7303 llvm::Value *Size = nullptr;
7304 // Do bitcopy of all non-overlapped structure elements.
7306 Component : OverlappedElements) {
7307 Address ComponentLB = Address::invalid();
7309 Component) {
7310 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7311 const auto *FD = dyn_cast<FieldDecl>(VD);
7312 if (FD && FD->getType()->isLValueReferenceType()) {
7313 const auto *ME =
7314 cast<MemberExpr>(MC.getAssociatedExpression());
7315 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7316 ComponentLB =
7317 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7318 .getAddress();
7319 } else {
7320 ComponentLB =
7321 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7322 .getAddress();
7323 }
7324 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7325 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7326 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7327 LBPtr);
7328 break;
7329 }
7330 }
7331 assert(Size && "Failed to determine structure size");
7332 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7333 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7334 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7335 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7336 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7337 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7338 Size, CGF.Int64Ty, /*isSigned=*/true));
7339 CombinedInfo.Types.push_back(Flags);
7340 CombinedInfo.Mappers.push_back(nullptr);
7341 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7342 : 1);
7343 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7344 }
7345 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7346 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7347 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7348 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7349 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7350 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7351 Size = CGF.Builder.CreatePtrDiff(
7352 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7353 LBPtr);
7354 CombinedInfo.Sizes.push_back(
7355 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7356 CombinedInfo.Types.push_back(Flags);
7357 CombinedInfo.Mappers.push_back(nullptr);
7358 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7359 : 1);
7360 break;
7361 }
7362 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7363 // Skip adding an entry in the CurInfo of this combined entry if the
7364 // whole struct is currently being mapped. The struct needs to be added
7365 // in the first position before any data internal to the struct is being
7366 // mapped.
7367 // Skip adding an entry in the CurInfo of this combined entry if the
7368 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
7369 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
7370 (Next == CE && MapType != OMPC_MAP_unknown)) {
7371 if (!IsMappingWholeStruct) {
7372 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7373 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7374 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7375 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7376 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7377 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7378 Size, CGF.Int64Ty, /*isSigned=*/true));
7379 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7380 : 1);
7381 } else {
7382 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7383 StructBaseCombinedInfo.BasePointers.push_back(
7384 BP.emitRawPointer(CGF));
7385 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7386 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7387 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7388 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7389 Size, CGF.Int64Ty, /*isSigned=*/true));
7390 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7391 IsNonContiguous ? DimSize : 1);
7392 }
7393
7394 // If Mapper is valid, the last component inherits the mapper.
7395 bool HasMapper = Mapper && Next == CE;
7396 if (!IsMappingWholeStruct)
7397 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7398 else
7399 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7400 : nullptr);
7401
7402 // We need to add a pointer flag for each map that comes from the
7403 // same expression except for the first one. We also need to signal
7404 // this map is the first one that relates with the current capture
7405 // (there is a set of entries for each capture).
7406 OpenMPOffloadMappingFlags Flags =
7407 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7408 !IsExpressionFirstInfo || RequiresReference ||
7409 FirstPointerInComplexData || IsMemberReference,
7410 AreBothBasePtrAndPteeMapped ||
7411 (IsCaptureFirstInfo && !RequiresReference),
7412 IsNonContiguous);
7413
7414 if (!IsExpressionFirstInfo || IsMemberReference) {
7415 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7416 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7417 if (IsPointer || (IsMemberReference && Next != CE))
7418 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7419 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7420 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7421 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7422 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7423
7424 if (ShouldBeMemberOf) {
7425 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7426 // should be later updated with the correct value of MEMBER_OF.
7427 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7428 // From now on, all subsequent PTR_AND_OBJ entries should not be
7429 // marked as MEMBER_OF.
7430 ShouldBeMemberOf = false;
7431 }
7432 }
7433
7434 if (!IsMappingWholeStruct)
7435 CombinedInfo.Types.push_back(Flags);
7436 else
7437 StructBaseCombinedInfo.Types.push_back(Flags);
7438 }
7439
7440 // If we have encountered a member expression so far, keep track of the
7441 // mapped member. If the parent is "*this", then the value declaration
7442 // is nullptr.
7443 if (EncounteredME) {
7444 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7445 unsigned FieldIndex = FD->getFieldIndex();
7446
7447 // Update info about the lowest and highest elements for this struct
7448 if (!PartialStruct.Base.isValid()) {
7449 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7450 if (IsFinalArraySection) {
7451 Address HB =
7452 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7453 .getAddress();
7454 PartialStruct.HighestElem = {FieldIndex, HB};
7455 } else {
7456 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7457 }
7458 PartialStruct.Base = BP;
7459 PartialStruct.LB = BP;
7460 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7461 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7462 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7463 if (IsFinalArraySection) {
7464 Address HB =
7465 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7466 .getAddress();
7467 PartialStruct.HighestElem = {FieldIndex, HB};
7468 } else {
7469 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7470 }
7471 }
7472 }
7473
7474 // Need to emit combined struct for array sections.
7475 if (IsFinalArraySection || IsNonContiguous)
7476 PartialStruct.IsArraySection = true;
7477
7478 // If we have a final array section, we are done with this expression.
7479 if (IsFinalArraySection)
7480 break;
7481
7482 // The pointer becomes the base for the next element.
7483 if (Next != CE)
7484 BP = IsMemberReference ? LowestElem : LB;
7485 if (!IsPartialMapped)
7486 IsExpressionFirstInfo = false;
7487 IsCaptureFirstInfo = false;
7488 FirstPointerInComplexData = false;
7489 IsPrevMemberReference = IsMemberReference;
7490 } else if (FirstPointerInComplexData) {
7491 QualType Ty = Components.rbegin()
7492 ->getAssociatedDeclaration()
7493 ->getType()
7494 .getNonReferenceType();
7495 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7496 FirstPointerInComplexData = false;
7497 }
7498 }
7499 // If ran into the whole component - allocate the space for the whole
7500 // record.
7501 if (!EncounteredME)
7502 PartialStruct.HasCompleteRecord = true;
7503
7504 if (!IsNonContiguous)
7505 return;
7506
7507 const ASTContext &Context = CGF.getContext();
7508
7509 // For supporting stride in array section, we need to initialize the first
7510 // dimension size as 1, first offset as 0, and first count as 1
7511 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7512 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7513 MapValuesArrayTy CurStrides;
7514 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7515 uint64_t ElementTypeSize;
7516
7517 // Collect Size information for each dimension and get the element size as
7518 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7519 // should be [10, 10] and the first stride is 4 btyes.
7521 Components) {
7522 const Expr *AssocExpr = Component.getAssociatedExpression();
7523 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7524
7525 if (!OASE)
7526 continue;
7527
7528 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7529 auto *CAT = Context.getAsConstantArrayType(Ty);
7530 auto *VAT = Context.getAsVariableArrayType(Ty);
7531
7532 // We need all the dimension size except for the last dimension.
7533 assert((VAT || CAT || &Component == &*Components.begin()) &&
7534 "Should be either ConstantArray or VariableArray if not the "
7535 "first Component");
7536
7537 // Get element size if CurStrides is empty.
7538 if (CurStrides.empty()) {
7539 const Type *ElementType = nullptr;
7540 if (CAT)
7541 ElementType = CAT->getElementType().getTypePtr();
7542 else if (VAT)
7543 ElementType = VAT->getElementType().getTypePtr();
7544 else
7545 assert(&Component == &*Components.begin() &&
7546 "Only expect pointer (non CAT or VAT) when this is the "
7547 "first Component");
7548 // If ElementType is null, then it means the base is a pointer
7549 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7550 // for next iteration.
7551 if (ElementType) {
7552 // For the case that having pointer as base, we need to remove one
7553 // level of indirection.
7554 if (&Component != &*Components.begin())
7555 ElementType = ElementType->getPointeeOrArrayElementType();
7556 ElementTypeSize =
7557 Context.getTypeSizeInChars(ElementType).getQuantity();
7558 CurStrides.push_back(
7559 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7560 }
7561 }
7562 // Get dimension value except for the last dimension since we don't need
7563 // it.
7564 if (DimSizes.size() < Components.size() - 1) {
7565 if (CAT)
7566 DimSizes.push_back(
7567 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7568 else if (VAT)
7569 DimSizes.push_back(CGF.Builder.CreateIntCast(
7570 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7571 /*IsSigned=*/false));
7572 }
7573 }
7574
7575 // Skip the dummy dimension since we have already have its information.
7576 auto *DI = DimSizes.begin() + 1;
7577 // Product of dimension.
7578 llvm::Value *DimProd =
7579 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7580
7581 // Collect info for non-contiguous. Notice that offset, count, and stride
7582 // are only meaningful for array-section, so we insert a null for anything
7583 // other than array-section.
7584 // Also, the size of offset, count, and stride are not the same as
7585 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7586 // count, and stride are the same as the number of non-contiguous
7587 // declaration in target update to/from clause.
7589 Components) {
7590 const Expr *AssocExpr = Component.getAssociatedExpression();
7591
7592 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7593 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7594 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7595 /*isSigned=*/false);
7596 CurOffsets.push_back(Offset);
7597 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7598 CurStrides.push_back(CurStrides.back());
7599 continue;
7600 }
7601
7602 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7603
7604 if (!OASE)
7605 continue;
7606
7607 // Offset
7608 const Expr *OffsetExpr = OASE->getLowerBound();
7609 llvm::Value *Offset = nullptr;
7610 if (!OffsetExpr) {
7611 // If offset is absent, then we just set it to zero.
7612 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7613 } else {
7614 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7615 CGF.Int64Ty,
7616 /*isSigned=*/false);
7617 }
7618 CurOffsets.push_back(Offset);
7619
7620 // Count
7621 const Expr *CountExpr = OASE->getLength();
7622 llvm::Value *Count = nullptr;
7623 if (!CountExpr) {
7624 // In Clang, once a high dimension is an array section, we construct all
7625 // the lower dimension as array section, however, for case like
7626 // arr[0:2][2], Clang construct the inner dimension as an array section
7627 // but it actually is not in an array section form according to spec.
7628 if (!OASE->getColonLocFirst().isValid() &&
7629 !OASE->getColonLocSecond().isValid()) {
7630 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7631 } else {
7632 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7633 // When the length is absent it defaults to ⌈(size −
7634 // lower-bound)/stride⌉, where size is the size of the array
7635 // dimension.
7636 const Expr *StrideExpr = OASE->getStride();
7637 llvm::Value *Stride =
7638 StrideExpr
7639 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7640 CGF.Int64Ty, /*isSigned=*/false)
7641 : nullptr;
7642 if (Stride)
7643 Count = CGF.Builder.CreateUDiv(
7644 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7645 else
7646 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7647 }
7648 } else {
7649 Count = CGF.EmitScalarExpr(CountExpr);
7650 }
7651 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7652 CurCounts.push_back(Count);
7653
7654 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7655 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7656 // Offset Count Stride
7657 // D0 0 1 4 (int) <- dummy dimension
7658 // D1 0 2 8 (2 * (1) * 4)
7659 // D2 1 2 20 (1 * (1 * 5) * 4)
7660 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7661 const Expr *StrideExpr = OASE->getStride();
7662 llvm::Value *Stride =
7663 StrideExpr
7664 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7665 CGF.Int64Ty, /*isSigned=*/false)
7666 : nullptr;
7667 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7668 if (Stride)
7669 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7670 else
7671 CurStrides.push_back(DimProd);
7672 if (DI != DimSizes.end())
7673 ++DI;
7674 }
7675
7676 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7677 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7678 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7679 }
7680
7681 /// Return the adjusted map modifiers if the declaration a capture refers to
7682 /// appears in a first-private clause. This is expected to be used only with
7683 /// directives that start with 'target'.
7684 OpenMPOffloadMappingFlags
7685 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7686 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7687
7688 // A first private variable captured by reference will use only the
7689 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7690 // declaration is known as first-private in this handler.
7691 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7692 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7693 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7694 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7695 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7696 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7697 }
7698 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7699 if (I != LambdasMap.end())
7700 // for map(to: lambda): using user specified map type.
7701 return getMapTypeBits(
7702 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7703 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
7704 /*AddPtrFlag=*/false,
7705 /*AddIsTargetParamFlag=*/false,
7706 /*isNonContiguous=*/false);
7707 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7708 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7709 }
7710
7711 void getPlainLayout(const CXXRecordDecl *RD,
7713 bool AsBase) const {
7714 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7715
7716 llvm::StructType *St =
7717 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7718
7719 unsigned NumElements = St->getNumElements();
7721 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7722 RecordLayout(NumElements);
7723
7724 // Fill bases.
7725 for (const auto &I : RD->bases()) {
7726 if (I.isVirtual())
7727 continue;
7728
7729 QualType BaseTy = I.getType();
7730 const auto *Base = BaseTy->getAsCXXRecordDecl();
7731 // Ignore empty bases.
7732 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
7733 CGF.getContext()
7736 .isZero())
7737 continue;
7738
7739 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7740 RecordLayout[FieldIndex] = Base;
7741 }
7742 // Fill in virtual bases.
7743 for (const auto &I : RD->vbases()) {
7744 QualType BaseTy = I.getType();
7745 // Ignore empty bases.
7746 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
7747 continue;
7748
7749 const auto *Base = BaseTy->getAsCXXRecordDecl();
7750 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7751 if (RecordLayout[FieldIndex])
7752 continue;
7753 RecordLayout[FieldIndex] = Base;
7754 }
7755 // Fill in all the fields.
7756 assert(!RD->isUnion() && "Unexpected union.");
7757 for (const auto *Field : RD->fields()) {
7758 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7759 // will fill in later.)
7760 if (!Field->isBitField() &&
7761 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
7762 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7763 RecordLayout[FieldIndex] = Field;
7764 }
7765 }
7766 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7767 &Data : RecordLayout) {
7768 if (Data.isNull())
7769 continue;
7770 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7771 getPlainLayout(Base, Layout, /*AsBase=*/true);
7772 else
7773 Layout.push_back(cast<const FieldDecl *>(Data));
7774 }
7775 }
7776
7777 /// Generate all the base pointers, section pointers, sizes, map types, and
7778 /// mappers for the extracted mappable expressions (all included in \a
7779 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7780 /// pair of the relevant declaration and index where it occurs is appended to
7781 /// the device pointers info array.
7782 void generateAllInfoForClauses(
7783 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7784 llvm::OpenMPIRBuilder &OMPBuilder,
7785 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7786 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7787 // We have to process the component lists that relate with the same
7788 // declaration in a single chunk so that we can generate the map flags
7789 // correctly. Therefore, we organize all lists in a map.
7790 enum MapKind { Present, Allocs, Other, Total };
7791 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7793 Info;
7794
7795 // Helper function to fill the information map for the different supported
7796 // clauses.
7797 auto &&InfoGen =
7798 [&Info, &SkipVarSet](
7799 const ValueDecl *D, MapKind Kind,
7801 OpenMPMapClauseKind MapType,
7803 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7804 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7805 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7806 if (SkipVarSet.contains(D))
7807 return;
7808 auto It = Info.try_emplace(D, Total).first;
7809 It->second[Kind].emplace_back(
7810 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7811 IsImplicit, Mapper, VarRef, ForDeviceAddr);
7812 };
7813
7814 for (const auto *Cl : Clauses) {
7815 const auto *C = dyn_cast<OMPMapClause>(Cl);
7816 if (!C)
7817 continue;
7818 MapKind Kind = Other;
7819 if (llvm::is_contained(C->getMapTypeModifiers(),
7820 OMPC_MAP_MODIFIER_present))
7821 Kind = Present;
7822 else if (C->getMapType() == OMPC_MAP_alloc)
7823 Kind = Allocs;
7824 const auto *EI = C->getVarRefs().begin();
7825 for (const auto L : C->component_lists()) {
7826 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7827 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7828 C->getMapTypeModifiers(), {},
7829 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7830 E);
7831 ++EI;
7832 }
7833 }
7834 for (const auto *Cl : Clauses) {
7835 const auto *C = dyn_cast<OMPToClause>(Cl);
7836 if (!C)
7837 continue;
7838 MapKind Kind = Other;
7839 if (llvm::is_contained(C->getMotionModifiers(),
7840 OMPC_MOTION_MODIFIER_present))
7841 Kind = Present;
7842 const auto *EI = C->getVarRefs().begin();
7843 for (const auto L : C->component_lists()) {
7844 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
7845 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7846 C->isImplicit(), std::get<2>(L), *EI);
7847 ++EI;
7848 }
7849 }
7850 for (const auto *Cl : Clauses) {
7851 const auto *C = dyn_cast<OMPFromClause>(Cl);
7852 if (!C)
7853 continue;
7854 MapKind Kind = Other;
7855 if (llvm::is_contained(C->getMotionModifiers(),
7856 OMPC_MOTION_MODIFIER_present))
7857 Kind = Present;
7858 const auto *EI = C->getVarRefs().begin();
7859 for (const auto L : C->component_lists()) {
7860 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
7861 C->getMotionModifiers(),
7862 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7863 *EI);
7864 ++EI;
7865 }
7866 }
7867
7868 // Look at the use_device_ptr and use_device_addr clauses information and
7869 // mark the existing map entries as such. If there is no map information for
7870 // an entry in the use_device_ptr and use_device_addr list, we create one
7871 // with map type 'alloc' and zero size section. It is the user fault if that
7872 // was not mapped before. If there is no map information and the pointer is
7873 // a struct member, then we defer the emission of that entry until the whole
7874 // struct has been processed.
7875 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7877 DeferredInfo;
7878 MapCombinedInfoTy UseDeviceDataCombinedInfo;
7879
7880 auto &&UseDeviceDataCombinedInfoGen =
7881 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7882 CodeGenFunction &CGF, bool IsDevAddr) {
7883 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7884 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7885 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7886 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7887 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7888 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7889 UseDeviceDataCombinedInfo.Sizes.push_back(
7890 llvm::Constant::getNullValue(CGF.Int64Ty));
7891 UseDeviceDataCombinedInfo.Types.push_back(
7892 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7893 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7894 };
7895
7896 auto &&MapInfoGen =
7897 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7898 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7900 Components,
7901 bool IsImplicit, bool IsDevAddr) {
7902 // We didn't find any match in our map information - generate a zero
7903 // size array section - if the pointer is a struct member we defer
7904 // this action until the whole struct has been processed.
7905 if (isa<MemberExpr>(IE)) {
7906 // Insert the pointer into Info to be processed by
7907 // generateInfoForComponentList. Because it is a member pointer
7908 // without a pointee, no entry will be generated for it, therefore
7909 // we need to generate one after the whole struct has been
7910 // processed. Nonetheless, generateInfoForComponentList must be
7911 // called to take the pointer into account for the calculation of
7912 // the range of the partial struct.
7913 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
7914 /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
7915 IsDevAddr);
7916 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7917 } else {
7918 llvm::Value *Ptr;
7919 if (IsDevAddr) {
7920 if (IE->isGLValue())
7921 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7922 else
7923 Ptr = CGF.EmitScalarExpr(IE);
7924 } else {
7925 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7926 }
7927 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7928 }
7929 };
7930
7931 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7932 const Expr *IE, bool IsDevAddr) -> bool {
7933 // We potentially have map information for this declaration already.
7934 // Look for the first set of components that refer to it. If found,
7935 // return true.
7936 // If the first component is a member expression, we have to look into
7937 // 'this', which maps to null in the map of map information. Otherwise
7938 // look directly for the information.
7939 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7940 if (It != Info.end()) {
7941 bool Found = false;
7942 for (auto &Data : It->second) {
7943 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7944 return MI.Components.back().getAssociatedDeclaration() == VD;
7945 });
7946 // If we found a map entry, signal that the pointer has to be
7947 // returned and move on to the next declaration. Exclude cases where
7948 // the base pointer is mapped as array subscript, array section or
7949 // array shaping. The base address is passed as a pointer to base in
7950 // this case and cannot be used as a base for use_device_ptr list
7951 // item.
7952 if (CI != Data.end()) {
7953 if (IsDevAddr) {
7954 CI->ForDeviceAddr = IsDevAddr;
7955 CI->ReturnDevicePointer = true;
7956 Found = true;
7957 break;
7958 } else {
7959 auto PrevCI = std::next(CI->Components.rbegin());
7960 const auto *VarD = dyn_cast<VarDecl>(VD);
7962 isa<MemberExpr>(IE) ||
7963 !VD->getType().getNonReferenceType()->isPointerType() ||
7964 PrevCI == CI->Components.rend() ||
7965 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7966 VarD->hasLocalStorage()) {
7967 CI->ForDeviceAddr = IsDevAddr;
7968 CI->ReturnDevicePointer = true;
7969 Found = true;
7970 break;
7971 }
7972 }
7973 }
7974 }
7975 return Found;
7976 }
7977 return false;
7978 };
7979
7980 // Look at the use_device_ptr clause information and mark the existing map
7981 // entries as such. If there is no map information for an entry in the
7982 // use_device_ptr list, we create one with map type 'alloc' and zero size
7983 // section. It is the user fault if that was not mapped before. If there is
7984 // no map information and the pointer is a struct member, then we defer the
7985 // emission of that entry until the whole struct has been processed.
7986 for (const auto *Cl : Clauses) {
7987 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
7988 if (!C)
7989 continue;
7990 for (const auto L : C->component_lists()) {
7992 std::get<1>(L);
7993 assert(!Components.empty() &&
7994 "Not expecting empty list of components!");
7995 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
7996 VD = cast<ValueDecl>(VD->getCanonicalDecl());
7997 const Expr *IE = Components.back().getAssociatedExpression();
7998 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
7999 continue;
8000 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8001 /*IsDevAddr=*/false);
8002 }
8003 }
8004
8005 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8006 for (const auto *Cl : Clauses) {
8007 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8008 if (!C)
8009 continue;
8010 for (const auto L : C->component_lists()) {
8012 std::get<1>(L);
8013 assert(!std::get<1>(L).empty() &&
8014 "Not expecting empty list of components!");
8015 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8016 if (!Processed.insert(VD).second)
8017 continue;
8018 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8019 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8020 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8021 continue;
8022 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8023 /*IsDevAddr=*/true);
8024 }
8025 }
8026
8027 for (const auto &Data : Info) {
8028 StructRangeInfoTy PartialStruct;
8029 // Current struct information:
8030 MapCombinedInfoTy CurInfo;
8031 // Current struct base information:
8032 MapCombinedInfoTy StructBaseCurInfo;
8033 const Decl *D = Data.first;
8034 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8035 bool HasMapBasePtr = false;
8036 bool HasMapArraySec = false;
8037 if (VD && VD->getType()->isAnyPointerType()) {
8038 for (const auto &M : Data.second) {
8039 HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8040 return isa_and_present<DeclRefExpr>(L.VarRef);
8041 });
8042 HasMapArraySec = any_of(M, [](const MapInfo &L) {
8043 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8044 L.VarRef);
8045 });
8046 if (HasMapBasePtr && HasMapArraySec)
8047 break;
8048 }
8049 }
8050 for (const auto &M : Data.second) {
8051 for (const MapInfo &L : M) {
8052 assert(!L.Components.empty() &&
8053 "Not expecting declaration with no component lists.");
8054
8055 // Remember the current base pointer index.
8056 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8057 unsigned StructBasePointersIdx =
8058 StructBaseCurInfo.BasePointers.size();
8059 CurInfo.NonContigInfo.IsNonContiguous =
8060 L.Components.back().isNonContiguous();
8061 generateInfoForComponentList(
8062 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8063 CurInfo, StructBaseCurInfo, PartialStruct,
8064 /*IsFirstComponentList=*/false, L.IsImplicit,
8065 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8066 L.VarRef, /*OverlappedElements*/ {},
8067 HasMapBasePtr && HasMapArraySec);
8068
8069 // If this entry relates to a device pointer, set the relevant
8070 // declaration and add the 'return pointer' flag.
8071 if (L.ReturnDevicePointer) {
8072 // Check whether a value was added to either CurInfo or
8073 // StructBaseCurInfo and error if no value was added to either of
8074 // them:
8075 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8076 StructBasePointersIdx <
8077 StructBaseCurInfo.BasePointers.size()) &&
8078 "Unexpected number of mapped base pointers.");
8079
8080 // Choose a base pointer index which is always valid:
8081 const ValueDecl *RelevantVD =
8082 L.Components.back().getAssociatedDeclaration();
8083 assert(RelevantVD &&
8084 "No relevant declaration related with device pointer??");
8085
8086 // If StructBaseCurInfo has been updated this iteration then work on
8087 // the first new entry added to it i.e. make sure that when multiple
8088 // values are added to any of the lists, the first value added is
8089 // being modified by the assignments below (not the last value
8090 // added).
8091 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8092 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8093 RelevantVD;
8094 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8095 L.ForDeviceAddr ? DeviceInfoTy::Address
8096 : DeviceInfoTy::Pointer;
8097 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8098 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8099 } else {
8100 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8101 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8102 L.ForDeviceAddr ? DeviceInfoTy::Address
8103 : DeviceInfoTy::Pointer;
8104 CurInfo.Types[CurrentBasePointersIdx] |=
8105 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8106 }
8107 }
8108 }
8109 }
8110
8111 // Append any pending zero-length pointers which are struct members and
8112 // used with use_device_ptr or use_device_addr.
8113 auto CI = DeferredInfo.find(Data.first);
8114 if (CI != DeferredInfo.end()) {
8115 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8116 llvm::Value *BasePtr;
8117 llvm::Value *Ptr;
8118 if (L.ForDeviceAddr) {
8119 if (L.IE->isGLValue())
8120 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8121 else
8122 Ptr = this->CGF.EmitScalarExpr(L.IE);
8123 BasePtr = Ptr;
8124 // Entry is RETURN_PARAM. Also, set the placeholder value
8125 // MEMBER_OF=FFFF so that the entry is later updated with the
8126 // correct value of MEMBER_OF.
8127 CurInfo.Types.push_back(
8128 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8129 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8130 } else {
8131 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8132 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8133 L.IE->getExprLoc());
8134 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8135 // placeholder value MEMBER_OF=FFFF so that the entry is later
8136 // updated with the correct value of MEMBER_OF.
8137 CurInfo.Types.push_back(
8138 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8139 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8140 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8141 }
8142 CurInfo.Exprs.push_back(L.VD);
8143 CurInfo.BasePointers.emplace_back(BasePtr);
8144 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8145 CurInfo.DevicePointers.emplace_back(
8146 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8147 CurInfo.Pointers.push_back(Ptr);
8148 CurInfo.Sizes.push_back(
8149 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8150 CurInfo.Mappers.push_back(nullptr);
8151 }
8152 }
8153
8154 // Unify entries in one list making sure the struct mapping precedes the
8155 // individual fields:
8156 MapCombinedInfoTy UnionCurInfo;
8157 UnionCurInfo.append(StructBaseCurInfo);
8158 UnionCurInfo.append(CurInfo);
8159
8160 // If there is an entry in PartialStruct it means we have a struct with
8161 // individual members mapped. Emit an extra combined entry.
8162 if (PartialStruct.Base.isValid()) {
8163 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8164 // Emit a combined entry:
8165 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8166 /*IsMapThis*/ !VD, OMPBuilder, VD);
8167 }
8168
8169 // We need to append the results of this capture to what we already have.
8170 CombinedInfo.append(UnionCurInfo);
8171 }
8172 // Append data for use_device_ptr clauses.
8173 CombinedInfo.append(UseDeviceDataCombinedInfo);
8174 }
8175
8176public:
8177 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8178 : CurDir(&Dir), CGF(CGF) {
8179 // Extract firstprivate clause information.
8180 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8181 for (const auto *D : C->varlist())
8182 FirstPrivateDecls.try_emplace(
8183 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8184 // Extract implicit firstprivates from uses_allocators clauses.
8185 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8186 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8187 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8188 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8189 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8190 /*Implicit=*/true);
8191 else if (const auto *VD = dyn_cast<VarDecl>(
8192 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8193 ->getDecl()))
8194 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8195 }
8196 }
8197 // Extract device pointer clause information.
8198 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8199 for (auto L : C->component_lists())
8200 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8201 // Extract device addr clause information.
8202 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8203 for (auto L : C->component_lists())
8204 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8205 // Extract map information.
8206 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8207 if (C->getMapType() != OMPC_MAP_to)
8208 continue;
8209 for (auto L : C->component_lists()) {
8210 const ValueDecl *VD = std::get<0>(L);
8211 const auto *RD = VD ? VD->getType()
8215 : nullptr;
8216 if (RD && RD->isLambda())
8217 LambdasMap.try_emplace(std::get<0>(L), C);
8218 }
8219 }
8220 }
8221
8222 /// Constructor for the declare mapper directive.
8223 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8224 : CurDir(&Dir), CGF(CGF) {}
8225
8226 /// Generate code for the combined entry if we have a partially mapped struct
8227 /// and take care of the mapping flags of the arguments corresponding to
8228 /// individual struct members.
8229 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8230 MapFlagsArrayTy &CurTypes,
8231 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8232 llvm::OpenMPIRBuilder &OMPBuilder,
8233 const ValueDecl *VD = nullptr,
8234 bool NotTargetParams = true) const {
8235 if (CurTypes.size() == 1 &&
8236 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8237 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8238 !PartialStruct.IsArraySection)
8239 return;
8240 Address LBAddr = PartialStruct.LowestElem.second;
8241 Address HBAddr = PartialStruct.HighestElem.second;
8242 if (PartialStruct.HasCompleteRecord) {
8243 LBAddr = PartialStruct.LB;
8244 HBAddr = PartialStruct.LB;
8245 }
8246 CombinedInfo.Exprs.push_back(VD);
8247 // Base is the base of the struct
8248 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8249 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8250 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8251 // Pointer is the address of the lowest element
8252 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8253 const CXXMethodDecl *MD =
8254 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8255 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8256 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8257 // There should not be a mapper for a combined entry.
8258 if (HasBaseClass) {
8259 // OpenMP 5.2 148:21:
8260 // If the target construct is within a class non-static member function,
8261 // and a variable is an accessible data member of the object for which the
8262 // non-static data member function is invoked, the variable is treated as
8263 // if the this[:1] expression had appeared in a map clause with a map-type
8264 // of tofrom.
8265 // Emit this[:1]
8266 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8268 llvm::Value *Size =
8269 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8270 /*isSigned=*/true);
8271 CombinedInfo.Sizes.push_back(Size);
8272 } else {
8273 CombinedInfo.Pointers.push_back(LB);
8274 // Size is (addr of {highest+1} element) - (addr of lowest element)
8275 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8276 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8277 HBAddr.getElementType(), HB, /*Idx0=*/1);
8278 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8279 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8280 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8281 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8282 /*isSigned=*/false);
8283 CombinedInfo.Sizes.push_back(Size);
8284 }
8285 CombinedInfo.Mappers.push_back(nullptr);
8286 // Map type is always TARGET_PARAM, if generate info for captures.
8287 CombinedInfo.Types.push_back(
8288 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8289 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
8290 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
8291 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8292 // If any element has the present modifier, then make sure the runtime
8293 // doesn't attempt to allocate the struct.
8294 if (CurTypes.end() !=
8295 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8296 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8297 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8298 }))
8299 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8300 // Remove TARGET_PARAM flag from the first element
8301 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8302 // If any element has the ompx_hold modifier, then make sure the runtime
8303 // uses the hold reference count for the struct as a whole so that it won't
8304 // be unmapped by an extra dynamic reference count decrement. Add it to all
8305 // elements as well so the runtime knows which reference count to check
8306 // when determining whether it's time for device-to-host transfers of
8307 // individual elements.
8308 if (CurTypes.end() !=
8309 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8310 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8311 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8312 })) {
8313 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8314 for (auto &M : CurTypes)
8315 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8316 }
8317
8318 // All other current entries will be MEMBER_OF the combined entry
8319 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8320 // 0xFFFF in the MEMBER_OF field).
8321 OpenMPOffloadMappingFlags MemberOfFlag =
8322 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8323 for (auto &M : CurTypes)
8324 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8325 }
8326
8327 /// Generate all the base pointers, section pointers, sizes, map types, and
8328 /// mappers for the extracted mappable expressions (all included in \a
8329 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8330 /// pair of the relevant declaration and index where it occurs is appended to
8331 /// the device pointers info array.
8332 void generateAllInfo(
8333 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8334 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8335 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8336 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8337 "Expect a executable directive");
8338 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8339 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8340 SkipVarSet);
8341 }
8342
8343 /// Generate all the base pointers, section pointers, sizes, map types, and
8344 /// mappers for the extracted map clauses of user-defined mapper (all included
8345 /// in \a CombinedInfo).
8346 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8347 llvm::OpenMPIRBuilder &OMPBuilder) const {
8348 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
8349 "Expect a declare mapper directive");
8350 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
8351 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8352 OMPBuilder);
8353 }
8354
8355 /// Emit capture info for lambdas for variables captured by reference.
8356 void generateInfoForLambdaCaptures(
8357 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8358 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8360 const auto *RD = VDType->getAsCXXRecordDecl();
8361 if (!RD || !RD->isLambda())
8362 return;
8363 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8364 CGF.getContext().getDeclAlign(VD));
8365 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8366 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8367 FieldDecl *ThisCapture = nullptr;
8368 RD->getCaptureFields(Captures, ThisCapture);
8369 if (ThisCapture) {
8370 LValue ThisLVal =
8371 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8372 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8373 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8374 VDLVal.getPointer(CGF));
8375 CombinedInfo.Exprs.push_back(VD);
8376 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8377 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8378 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8379 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8380 CombinedInfo.Sizes.push_back(
8381 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8382 CGF.Int64Ty, /*isSigned=*/true));
8383 CombinedInfo.Types.push_back(
8384 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8385 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8386 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8387 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8388 CombinedInfo.Mappers.push_back(nullptr);
8389 }
8390 for (const LambdaCapture &LC : RD->captures()) {
8391 if (!LC.capturesVariable())
8392 continue;
8393 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8394 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8395 continue;
8396 auto It = Captures.find(VD);
8397 assert(It != Captures.end() && "Found lambda capture without field.");
8398 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8399 if (LC.getCaptureKind() == LCK_ByRef) {
8400 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8401 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8402 VDLVal.getPointer(CGF));
8403 CombinedInfo.Exprs.push_back(VD);
8404 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8405 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8406 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8407 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8408 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8409 CGF.getTypeSize(
8411 CGF.Int64Ty, /*isSigned=*/true));
8412 } else {
8413 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8414 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8415 VDLVal.getPointer(CGF));
8416 CombinedInfo.Exprs.push_back(VD);
8417 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8418 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8419 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8420 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8421 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8422 }
8423 CombinedInfo.Types.push_back(
8424 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8425 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8426 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8427 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8428 CombinedInfo.Mappers.push_back(nullptr);
8429 }
8430 }
8431
8432 /// Set correct indices for lambdas captures.
8433 void adjustMemberOfForLambdaCaptures(
8434 llvm::OpenMPIRBuilder &OMPBuilder,
8435 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8436 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8437 MapFlagsArrayTy &Types) const {
8438 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8439 // Set correct member_of idx for all implicit lambda captures.
8440 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8441 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8442 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8443 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8444 continue;
8445 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8446 assert(BasePtr && "Unable to find base lambda address.");
8447 int TgtIdx = -1;
8448 for (unsigned J = I; J > 0; --J) {
8449 unsigned Idx = J - 1;
8450 if (Pointers[Idx] != BasePtr)
8451 continue;
8452 TgtIdx = Idx;
8453 break;
8454 }
8455 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8456 // All other current entries will be MEMBER_OF the combined entry
8457 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8458 // 0xFFFF in the MEMBER_OF field).
8459 OpenMPOffloadMappingFlags MemberOfFlag =
8460 OMPBuilder.getMemberOfFlag(TgtIdx);
8461 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8462 }
8463 }
8464
8465 /// Generate the base pointers, section pointers, sizes, map types, and
8466 /// mappers associated to a given capture (all included in \a CombinedInfo).
8467 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8468 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8469 StructRangeInfoTy &PartialStruct) const {
8470 assert(!Cap->capturesVariableArrayType() &&
8471 "Not expecting to generate map info for a variable array type!");
8472
8473 // We need to know when we generating information for the first component
8474 const ValueDecl *VD = Cap->capturesThis()
8475 ? nullptr
8476 : Cap->getCapturedVar()->getCanonicalDecl();
8477
8478 // for map(to: lambda): skip here, processing it in
8479 // generateDefaultMapInfo
8480 if (LambdasMap.count(VD))
8481 return;
8482
8483 // If this declaration appears in a is_device_ptr clause we just have to
8484 // pass the pointer by value. If it is a reference to a declaration, we just
8485 // pass its value.
8486 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8487 CombinedInfo.Exprs.push_back(VD);
8488 CombinedInfo.BasePointers.emplace_back(Arg);
8489 CombinedInfo.DevicePtrDecls.emplace_back(VD);
8490 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8491 CombinedInfo.Pointers.push_back(Arg);
8492 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8493 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8494 /*isSigned=*/true));
8495 CombinedInfo.Types.push_back(
8496 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8497 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8498 CombinedInfo.Mappers.push_back(nullptr);
8499 return;
8500 }
8501
8502 using MapData =
8505 const ValueDecl *, const Expr *>;
8506 SmallVector<MapData, 4> DeclComponentLists;
8507 // For member fields list in is_device_ptr, store it in
8508 // DeclComponentLists for generating components info.
8510 auto It = DevPointersMap.find(VD);
8511 if (It != DevPointersMap.end())
8512 for (const auto &MCL : It->second)
8513 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8514 /*IsImpicit = */ true, nullptr,
8515 nullptr);
8516 auto I = HasDevAddrsMap.find(VD);
8517 if (I != HasDevAddrsMap.end())
8518 for (const auto &MCL : I->second)
8519 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8520 /*IsImpicit = */ true, nullptr,
8521 nullptr);
8522 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8523 "Expect a executable directive");
8524 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8525 bool HasMapBasePtr = false;
8526 bool HasMapArraySec = false;
8527 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8528 const auto *EI = C->getVarRefs().begin();
8529 for (const auto L : C->decl_component_lists(VD)) {
8530 const ValueDecl *VDecl, *Mapper;
8531 // The Expression is not correct if the mapping is implicit
8532 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8534 std::tie(VDecl, Components, Mapper) = L;
8535 assert(VDecl == VD && "We got information for the wrong declaration??");
8536 assert(!Components.empty() &&
8537 "Not expecting declaration with no component lists.");
8538 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8539 HasMapBasePtr = true;
8540 if (VD && E && VD->getType()->isAnyPointerType() &&
8541 (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8542 HasMapArraySec = true;
8543 DeclComponentLists.emplace_back(Components, C->getMapType(),
8544 C->getMapTypeModifiers(),
8545 C->isImplicit(), Mapper, E);
8546 ++EI;
8547 }
8548 }
8549 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8550 const MapData &RHS) {
8551 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8552 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8553 bool HasPresent =
8554 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8555 bool HasAllocs = MapType == OMPC_MAP_alloc;
8556 MapModifiers = std::get<2>(RHS);
8557 MapType = std::get<1>(LHS);
8558 bool HasPresentR =
8559 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8560 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8561 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8562 });
8563
8564 // Find overlapping elements (including the offset from the base element).
8565 llvm::SmallDenseMap<
8566 const MapData *,
8569 4>
8570 OverlappedData;
8571 size_t Count = 0;
8572 for (const MapData &L : DeclComponentLists) {
8574 OpenMPMapClauseKind MapType;
8576 bool IsImplicit;
8577 const ValueDecl *Mapper;
8578 const Expr *VarRef;
8579 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8580 L;
8581 ++Count;
8582 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8584 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8585 VarRef) = L1;
8586 auto CI = Components.rbegin();
8587 auto CE = Components.rend();
8588 auto SI = Components1.rbegin();
8589 auto SE = Components1.rend();
8590 for (; CI != CE && SI != SE; ++CI, ++SI) {
8591 if (CI->getAssociatedExpression()->getStmtClass() !=
8592 SI->getAssociatedExpression()->getStmtClass())
8593 break;
8594 // Are we dealing with different variables/fields?
8595 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8596 break;
8597 }
8598 // Found overlapping if, at least for one component, reached the head
8599 // of the components list.
8600 if (CI == CE || SI == SE) {
8601 // Ignore it if it is the same component.
8602 if (CI == CE && SI == SE)
8603 continue;
8604 const auto It = (SI == SE) ? CI : SI;
8605 // If one component is a pointer and another one is a kind of
8606 // dereference of this pointer (array subscript, section, dereference,
8607 // etc.), it is not an overlapping.
8608 // Same, if one component is a base and another component is a
8609 // dereferenced pointer memberexpr with the same base.
8610 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8611 (std::prev(It)->getAssociatedDeclaration() &&
8612 std::prev(It)
8613 ->getAssociatedDeclaration()
8614 ->getType()
8615 ->isPointerType()) ||
8616 (It->getAssociatedDeclaration() &&
8617 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8618 std::next(It) != CE && std::next(It) != SE))
8619 continue;
8620 const MapData &BaseData = CI == CE ? L : L1;
8622 SI == SE ? Components : Components1;
8623 OverlappedData[&BaseData].push_back(SubData);
8624 }
8625 }
8626 }
8627 // Sort the overlapped elements for each item.
8629 if (!OverlappedData.empty()) {
8630 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8631 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8632 while (BaseType != OrigType) {
8633 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8634 OrigType = BaseType->getPointeeOrArrayElementType();
8635 }
8636
8637 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8638 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8639 else {
8640 const auto *RD = BaseType->getAsRecordDecl();
8641 Layout.append(RD->field_begin(), RD->field_end());
8642 }
8643 }
8644 for (auto &Pair : OverlappedData) {
8645 llvm::stable_sort(
8646 Pair.getSecond(),
8647 [&Layout](
8650 Second) {
8651 auto CI = First.rbegin();
8652 auto CE = First.rend();
8653 auto SI = Second.rbegin();
8654 auto SE = Second.rend();
8655 for (; CI != CE && SI != SE; ++CI, ++SI) {
8656 if (CI->getAssociatedExpression()->getStmtClass() !=
8657 SI->getAssociatedExpression()->getStmtClass())
8658 break;
8659 // Are we dealing with different variables/fields?
8660 if (CI->getAssociatedDeclaration() !=
8661 SI->getAssociatedDeclaration())
8662 break;
8663 }
8664
8665 // Lists contain the same elements.
8666 if (CI == CE && SI == SE)
8667 return false;
8668
8669 // List with less elements is less than list with more elements.
8670 if (CI == CE || SI == SE)
8671 return CI == CE;
8672
8673 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8674 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8675 if (FD1->getParent() == FD2->getParent())
8676 return FD1->getFieldIndex() < FD2->getFieldIndex();
8677 const auto *It =
8678 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8679 return FD == FD1 || FD == FD2;
8680 });
8681 return *It == FD1;
8682 });
8683 }
8684
8685 // Associated with a capture, because the mapping flags depend on it.
8686 // Go through all of the elements with the overlapped elements.
8687 bool IsFirstComponentList = true;
8688 MapCombinedInfoTy StructBaseCombinedInfo;
8689 for (const auto &Pair : OverlappedData) {
8690 const MapData &L = *Pair.getFirst();
8692 OpenMPMapClauseKind MapType;
8694 bool IsImplicit;
8695 const ValueDecl *Mapper;
8696 const Expr *VarRef;
8697 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8698 L;
8700 OverlappedComponents = Pair.getSecond();
8701 generateInfoForComponentList(
8702 MapType, MapModifiers, {}, Components, CombinedInfo,
8703 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8704 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8705 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8706 IsFirstComponentList = false;
8707 }
8708 // Go through other elements without overlapped elements.
8709 for (const MapData &L : DeclComponentLists) {
8711 OpenMPMapClauseKind MapType;
8713 bool IsImplicit;
8714 const ValueDecl *Mapper;
8715 const Expr *VarRef;
8716 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8717 L;
8718 auto It = OverlappedData.find(&L);
8719 if (It == OverlappedData.end())
8720 generateInfoForComponentList(
8721 MapType, MapModifiers, {}, Components, CombinedInfo,
8722 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8723 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8724 /*ForDeviceAddr=*/false, VD, VarRef,
8725 /*OverlappedElements*/ {}, HasMapBasePtr && HasMapArraySec);
8726 IsFirstComponentList = false;
8727 }
8728 }
8729
8730 /// Generate the default map information for a given capture \a CI,
8731 /// record field declaration \a RI and captured value \a CV.
8732 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8733 const FieldDecl &RI, llvm::Value *CV,
8734 MapCombinedInfoTy &CombinedInfo) const {
8735 bool IsImplicit = true;
8736 // Do the default mapping.
8737 if (CI.capturesThis()) {
8738 CombinedInfo.Exprs.push_back(nullptr);
8739 CombinedInfo.BasePointers.push_back(CV);
8740 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8741 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8742 CombinedInfo.Pointers.push_back(CV);
8743 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8744 CombinedInfo.Sizes.push_back(
8745 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8746 CGF.Int64Ty, /*isSigned=*/true));
8747 // Default map type.
8748 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8749 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8750 } else if (CI.capturesVariableByCopy()) {
8751 const VarDecl *VD = CI.getCapturedVar();
8752 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8753 CombinedInfo.BasePointers.push_back(CV);
8754 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8755 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8756 CombinedInfo.Pointers.push_back(CV);
8757 if (!RI.getType()->isAnyPointerType()) {
8758 // We have to signal to the runtime captures passed by value that are
8759 // not pointers.
8760 CombinedInfo.Types.push_back(
8761 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8762 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8763 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8764 } else {
8765 // Pointers are implicitly mapped with a zero size and no flags
8766 // (other than first map that is added for all implicit maps).
8767 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8768 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8769 }
8770 auto I = FirstPrivateDecls.find(VD);
8771 if (I != FirstPrivateDecls.end())
8772 IsImplicit = I->getSecond();
8773 } else {
8774 assert(CI.capturesVariable() && "Expected captured reference.");
8775 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8776 QualType ElementType = PtrTy->getPointeeType();
8777 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8778 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8779 // The default map type for a scalar/complex type is 'to' because by
8780 // default the value doesn't have to be retrieved. For an aggregate
8781 // type, the default is 'tofrom'.
8782 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8783 const VarDecl *VD = CI.getCapturedVar();
8784 auto I = FirstPrivateDecls.find(VD);
8785 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8786 CombinedInfo.BasePointers.push_back(CV);
8787 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8788 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8789 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8790 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8791 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8793 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
8794 } else {
8795 CombinedInfo.Pointers.push_back(CV);
8796 }
8797 if (I != FirstPrivateDecls.end())
8798 IsImplicit = I->getSecond();
8799 }
8800 // Every default map produces a single argument which is a target parameter.
8801 CombinedInfo.Types.back() |=
8802 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8803
8804 // Add flag stating this is an implicit map.
8805 if (IsImplicit)
8806 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8807
8808 // No user-defined mapper for default mapping.
8809 CombinedInfo.Mappers.push_back(nullptr);
8810 }
8811};
8812} // anonymous namespace
8813
8814// Try to extract the base declaration from a `this->x` expression if possible.
8816 if (!E)
8817 return nullptr;
8818
8819 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
8820 if (const MemberExpr *ME =
8821 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8822 return ME->getMemberDecl();
8823 return nullptr;
8824}
8825
8826/// Emit a string constant containing the names of the values mapped to the
8827/// offloading runtime library.
8828static llvm::Constant *
8829emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8830 MappableExprsHandler::MappingExprInfo &MapExprs) {
8831
8832 uint32_t SrcLocStrSize;
8833 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8834 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8835
8837 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8838 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8839 Loc = VD->getLocation();
8840 else
8841 Loc = MapExprs.getMapExpr()->getExprLoc();
8842 } else {
8843 Loc = MapExprs.getMapDecl()->getLocation();
8844 }
8845
8846 std::string ExprName;
8847 if (MapExprs.getMapExpr()) {
8849 llvm::raw_string_ostream OS(ExprName);
8850 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8851 } else {
8852 ExprName = MapExprs.getMapDecl()->getNameAsString();
8853 }
8854
8856 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8857 PLoc.getLine(), PLoc.getColumn(),
8858 SrcLocStrSize);
8859}
8860/// Emit the arrays used to pass the captures and map information to the
8861/// offloading runtime library. If there is no map or capture information,
8862/// return nullptr by reference.
8864 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8865 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8866 bool IsNonContiguous = false, bool ForEndCall = false) {
8867 CodeGenModule &CGM = CGF.CGM;
8868
8869 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8870 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8871 CGF.AllocaInsertPt->getIterator());
8872 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8873 CGF.Builder.GetInsertPoint());
8874
8875 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8876 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8877 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8878 }
8879 };
8880
8881 auto CustomMapperCB = [&](unsigned int I) {
8882 llvm::Value *MFunc = nullptr;
8883 if (CombinedInfo.Mappers[I]) {
8884 Info.HasMapper = true;
8886 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8887 }
8888 return MFunc;
8889 };
8890 OMPBuilder.emitOffloadingArraysAndArgs(
8891 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, IsNonContiguous,
8892 ForEndCall, DeviceAddrCB, CustomMapperCB);
8893}
8894
8895/// Check for inner distribute directive.
8896static const OMPExecutableDirective *
8898 const auto *CS = D.getInnermostCapturedStmt();
8899 const auto *Body =
8900 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8901 const Stmt *ChildStmt =
8903
8904 if (const auto *NestedDir =
8905 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8906 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8907 switch (D.getDirectiveKind()) {
8908 case OMPD_target:
8909 // For now, treat 'target' with nested 'teams loop' as if it's
8910 // distributed (target teams distribute).
8911 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8912 return NestedDir;
8913 if (DKind == OMPD_teams) {
8914 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8915 /*IgnoreCaptured=*/true);
8916 if (!Body)
8917 return nullptr;
8918 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8919 if (const auto *NND =
8920 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8921 DKind = NND->getDirectiveKind();
8922 if (isOpenMPDistributeDirective(DKind))
8923 return NND;
8924 }
8925 }
8926 return nullptr;
8927 case OMPD_target_teams:
8928 if (isOpenMPDistributeDirective(DKind))
8929 return NestedDir;
8930 return nullptr;
8931 case OMPD_target_parallel:
8932 case OMPD_target_simd:
8933 case OMPD_target_parallel_for:
8934 case OMPD_target_parallel_for_simd:
8935 return nullptr;
8936 case OMPD_target_teams_distribute:
8937 case OMPD_target_teams_distribute_simd:
8938 case OMPD_target_teams_distribute_parallel_for:
8939 case OMPD_target_teams_distribute_parallel_for_simd:
8940 case OMPD_parallel:
8941 case OMPD_for:
8942 case OMPD_parallel_for:
8943 case OMPD_parallel_master:
8944 case OMPD_parallel_sections:
8945 case OMPD_for_simd:
8946 case OMPD_parallel_for_simd:
8947 case OMPD_cancel:
8948 case OMPD_cancellation_point:
8949 case OMPD_ordered:
8950 case OMPD_threadprivate:
8951 case OMPD_allocate:
8952 case OMPD_task:
8953 case OMPD_simd:
8954 case OMPD_tile:
8955 case OMPD_unroll:
8956 case OMPD_sections:
8957 case OMPD_section:
8958 case OMPD_single:
8959 case OMPD_master:
8960 case OMPD_critical:
8961 case OMPD_taskyield:
8962 case OMPD_barrier:
8963 case OMPD_taskwait:
8964 case OMPD_taskgroup:
8965 case OMPD_atomic:
8966 case OMPD_flush:
8967 case OMPD_depobj:
8968 case OMPD_scan:
8969 case OMPD_teams:
8970 case OMPD_target_data:
8971 case OMPD_target_exit_data:
8972 case OMPD_target_enter_data:
8973 case OMPD_distribute:
8974 case OMPD_distribute_simd:
8975 case OMPD_distribute_parallel_for:
8976 case OMPD_distribute_parallel_for_simd:
8977 case OMPD_teams_distribute:
8978 case OMPD_teams_distribute_simd:
8979 case OMPD_teams_distribute_parallel_for:
8980 case OMPD_teams_distribute_parallel_for_simd:
8981 case OMPD_target_update:
8982 case OMPD_declare_simd:
8983 case OMPD_declare_variant:
8984 case OMPD_begin_declare_variant:
8985 case OMPD_end_declare_variant:
8986 case OMPD_declare_target:
8987 case OMPD_end_declare_target:
8988 case OMPD_declare_reduction:
8989 case OMPD_declare_mapper:
8990 case OMPD_taskloop:
8991 case OMPD_taskloop_simd:
8992 case OMPD_master_taskloop:
8993 case OMPD_master_taskloop_simd:
8994 case OMPD_parallel_master_taskloop:
8995 case OMPD_parallel_master_taskloop_simd:
8996 case OMPD_requires:
8997 case OMPD_metadirective:
8998 case OMPD_unknown:
8999 default:
9000 llvm_unreachable("Unexpected directive.");
9001 }
9002 }
9003
9004 return nullptr;
9005}
9006
9007/// Emit the user-defined mapper function. The code generation follows the
9008/// pattern in the example below.
9009/// \code
9010/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9011/// void *base, void *begin,
9012/// int64_t size, int64_t type,
9013/// void *name = nullptr) {
9014/// // Allocate space for an array section first or add a base/begin for
9015/// // pointer dereference.
9016/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9017/// !maptype.IsDelete)
9018/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9019/// size*sizeof(Ty), clearToFromMember(type));
9020/// // Map members.
9021/// for (unsigned i = 0; i < size; i++) {
9022/// // For each component specified by this mapper:
9023/// for (auto c : begin[i]->all_components) {
9024/// if (c.hasMapper())
9025/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9026/// c.arg_type, c.arg_name);
9027/// else
9028/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9029/// c.arg_begin, c.arg_size, c.arg_type,
9030/// c.arg_name);
9031/// }
9032/// }
9033/// // Delete the array section.
9034/// if (size > 1 && maptype.IsDelete)
9035/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9036/// size*sizeof(Ty), clearToFromMember(type));
9037/// }
9038/// \endcode
9040 CodeGenFunction *CGF) {
9041 if (UDMMap.count(D) > 0)
9042 return;
9044 QualType Ty = D->getType();
9045 auto *MapperVarDecl =
9046 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9047 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9048 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9049
9050 CodeGenFunction MapperCGF(CGM);
9051 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9052 auto PrivatizeAndGenMapInfoCB =
9053 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9054 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9055 MapperCGF.Builder.restoreIP(CodeGenIP);
9056
9057 // Privatize the declared variable of mapper to be the current array
9058 // element.
9059 Address PtrCurrent(
9060 PtrPHI, ElemTy,
9061 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9062 .getAlignment()
9063 .alignmentOfArrayElement(ElementSize));
9065 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9066 (void)Scope.Privatize();
9067
9068 // Get map clause information.
9069 MappableExprsHandler MEHandler(*D, MapperCGF);
9070 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9071
9072 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9073 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
9074 };
9075 if (CGM.getCodeGenOpts().getDebugInfo() !=
9076 llvm::codegenoptions::NoDebugInfo) {
9077 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9078 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9079 FillInfoMap);
9080 }
9081
9082 return CombinedInfo;
9083 };
9084
9085 auto CustomMapperCB = [&](unsigned I, llvm::Function **MapperFunc) {
9086 if (CombinedInfo.Mappers[I]) {
9087 // Call the corresponding mapper function.
9089 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9090 assert(*MapperFunc && "Expect a valid mapper function is available.");
9091 return true;
9092 }
9093 return false;
9094 };
9095
9096 SmallString<64> TyStr;
9097 llvm::raw_svector_ostream Out(TyStr);
9099 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9100
9101 auto *NewFn = OMPBuilder.emitUserDefinedMapper(PrivatizeAndGenMapInfoCB,
9102 ElemTy, Name, CustomMapperCB);
9103 UDMMap.try_emplace(D, NewFn);
9104 if (CGF)
9105 FunctionUDMMap[CGF->CurFn].push_back(D);
9106}
9107
9109 const OMPDeclareMapperDecl *D) {
9110 auto I = UDMMap.find(D);
9111 if (I != UDMMap.end())
9112 return I->second;
9114 return UDMMap.lookup(D);
9115}
9116
9119 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9120 const OMPLoopDirective &D)>
9121 SizeEmitter) {
9122 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9123 const OMPExecutableDirective *TD = &D;
9124 // Get nested teams distribute kind directive, if any. For now, treat
9125 // 'target_teams_loop' as if it's really a target_teams_distribute.
9126 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9127 Kind != OMPD_target_teams_loop)
9129 if (!TD)
9130 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9131
9132 const auto *LD = cast<OMPLoopDirective>(TD);
9133 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9134 return NumIterations;
9135 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9136}
9137
9138static void
9139emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9142 bool RequiresOuterTask, const CapturedStmt &CS,
9143 bool OffloadingMandatory, CodeGenFunction &CGF) {
9144 if (OffloadingMandatory) {
9145 CGF.Builder.CreateUnreachable();
9146 } else {
9147 if (RequiresOuterTask) {
9148 CapturedVars.clear();
9149 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9150 }
9151 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9152 CapturedVars);
9153 }
9154}
9155
9156static llvm::Value *emitDeviceID(
9157 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9158 CodeGenFunction &CGF) {
9159 // Emit device ID if any.
9160 llvm::Value *DeviceID;
9161 if (Device.getPointer()) {
9162 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9163 Device.getInt() == OMPC_DEVICE_device_num) &&
9164 "Expected device_num modifier.");
9165 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9166 DeviceID =
9167 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9168 } else {
9169 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9170 }
9171 return DeviceID;
9172}
9173
9174static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9175 CodeGenFunction &CGF) {
9176 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9177
9178 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9179 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9180 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9181 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9182 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9183 /*isSigned=*/false);
9184 }
9185 return DynCGroupMem;
9186}
9188 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9189 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9190 llvm::OpenMPIRBuilder &OMPBuilder,
9191 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
9192 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9193
9194 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9195 auto RI = CS.getCapturedRecordDecl()->field_begin();
9196 auto *CV = CapturedVars.begin();
9198 CE = CS.capture_end();
9199 CI != CE; ++CI, ++RI, ++CV) {
9200 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9201 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9202
9203 // VLA sizes are passed to the outlined region by copy and do not have map
9204 // information associated.
9205 if (CI->capturesVariableArrayType()) {
9206 CurInfo.Exprs.push_back(nullptr);
9207 CurInfo.BasePointers.push_back(*CV);
9208 CurInfo.DevicePtrDecls.push_back(nullptr);
9209 CurInfo.DevicePointers.push_back(
9210 MappableExprsHandler::DeviceInfoTy::None);
9211 CurInfo.Pointers.push_back(*CV);
9212 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9213 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9214 // Copy to the device as an argument. No need to retrieve it.
9215 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9216 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9217 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9218 CurInfo.Mappers.push_back(nullptr);
9219 } else {
9220 // If we have any information in the map clause, we use it, otherwise we
9221 // just do a default mapping.
9222 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9223 if (!CI->capturesThis())
9224 MappedVarSet.insert(CI->getCapturedVar());
9225 else
9226 MappedVarSet.insert(nullptr);
9227 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9228 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9229 // Generate correct mapping for variables captured by reference in
9230 // lambdas.
9231 if (CI->capturesVariable())
9232 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9233 CurInfo, LambdaPointers);
9234 }
9235 // We expect to have at least an element of information for this capture.
9236 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9237 "Non-existing map pointer for capture!");
9238 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9239 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9240 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9241 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9242 "Inconsistent map information sizes!");
9243
9244 // If there is an entry in PartialStruct it means we have a struct with
9245 // individual members mapped. Emit an extra combined entry.
9246 if (PartialStruct.Base.isValid()) {
9247 CombinedInfo.append(PartialStruct.PreliminaryMapData);
9248 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
9249 CI->capturesThis(), OMPBuilder, nullptr,
9250 /*NotTargetParams*/ false);
9251 }
9252
9253 // We need to append the results of this capture to what we already have.
9254 CombinedInfo.append(CurInfo);
9255 }
9256 // Adjust MEMBER_OF flags for the lambdas captures.
9257 MEHandler.adjustMemberOfForLambdaCaptures(
9258 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9259 CombinedInfo.Pointers, CombinedInfo.Types);
9260}
9261static void
9262genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9263 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9264 llvm::OpenMPIRBuilder &OMPBuilder,
9265 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
9266 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
9267
9268 CodeGenModule &CGM = CGF.CGM;
9269 // Map any list items in a map clause that were not captures because they
9270 // weren't referenced within the construct.
9271 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
9272
9273 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9274 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9275 };
9276 if (CGM.getCodeGenOpts().getDebugInfo() !=
9277 llvm::codegenoptions::NoDebugInfo) {
9278 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9279 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9280 FillInfoMap);
9281 }
9282}
9283
9285 const CapturedStmt &CS,
9287 llvm::OpenMPIRBuilder &OMPBuilder,
9288 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9289 // Get mappable expression information.
9290 MappableExprsHandler MEHandler(D, CGF);
9291 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9292
9293 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
9294 MappedVarSet, CombinedInfo);
9295 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
9296}
9297
9298template <typename ClauseTy>
9299static void
9303 const auto *C = D.getSingleClause<ClauseTy>();
9304 assert(!C->varlist_empty() &&
9305 "ompx_bare requires explicit num_teams and thread_limit");
9306 CodeGenFunction::RunCleanupsScope Scope(CGF);
9307 for (auto *E : C->varlist()) {
9308 llvm::Value *V = CGF.EmitScalarExpr(E);
9309 Values.push_back(
9310 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
9311 }
9312}
9313
9315 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9317 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9318 const CapturedStmt &CS, bool OffloadingMandatory,
9319 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9320 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9321 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9322 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9323 const OMPLoopDirective &D)>
9324 SizeEmitter,
9325 CodeGenFunction &CGF, CodeGenModule &CGM) {
9326 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9327
9328 // Fill up the arrays with all the captured variables.
9329 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9331 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
9332
9333 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
9334 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
9335
9336 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9337 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9338 CGF.VoidPtrTy, CGM.getPointerAlign());
9339 InputInfo.PointersArray =
9340 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9341 InputInfo.SizesArray =
9342 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9343 InputInfo.MappersArray =
9344 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9345 MapTypesArray = Info.RTArgs.MapTypesArray;
9346 MapNamesArray = Info.RTArgs.MapNamesArray;
9347
9348 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9349 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9350 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9351 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9352 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9353
9354 if (IsReverseOffloading) {
9355 // Reverse offloading is not supported, so just execute on the host.
9356 // FIXME: This fallback solution is incorrect since it ignores the
9357 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9358 // assert here and ensure SEMA emits an error.
9359 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9360 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9361 return;
9362 }
9363
9364 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9365 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9366
9367 llvm::Value *BasePointersArray =
9368 InputInfo.BasePointersArray.emitRawPointer(CGF);
9369 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9370 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9371 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9372
9373 auto &&EmitTargetCallFallbackCB =
9374 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9375 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9376 -> llvm::OpenMPIRBuilder::InsertPointTy {
9377 CGF.Builder.restoreIP(IP);
9378 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9379 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9380 return CGF.Builder.saveIP();
9381 };
9382
9383 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
9386 if (IsBare) {
9387 emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, NumTeams);
9388 emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
9389 NumThreads);
9390 } else {
9391 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
9392 NumThreads.push_back(
9393 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
9394 }
9395
9396 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9397 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9398 llvm::Value *NumIterations =
9399 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9400 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9401 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9402 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9403
9404 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9405 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9406 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9407
9408 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9409 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9410 DynCGGroupMem, HasNoWait);
9411
9412 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
9413 OMPRuntime->getOMPBuilder().emitKernelLaunch(
9414 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
9415 RTLoc, AllocaIP);
9416 assert(AfterIP && "unexpected error creating kernel launch");
9417 CGF.Builder.restoreIP(*AfterIP);
9418 };
9419
9420 if (RequiresOuterTask)
9421 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9422 else
9423 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9424}
9425
9426static void
9427emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9430 bool RequiresOuterTask, const CapturedStmt &CS,
9431 bool OffloadingMandatory, CodeGenFunction &CGF) {
9432
9433 // Notify that the host version must be executed.
9434 auto &&ElseGen =
9435 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9436 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9437 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9438 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9439 };
9440
9441 if (RequiresOuterTask) {
9442 CodeGenFunction::OMPTargetDataInfo InputInfo;
9443 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9444 } else {
9445 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9446 }
9447}
9448
9451 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9452 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9453 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9454 const OMPLoopDirective &D)>
9455 SizeEmitter) {
9456 if (!CGF.HaveInsertPoint())
9457 return;
9458
9459 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9460 CGM.getLangOpts().OpenMPOffloadMandatory;
9461
9462 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9463
9464 const bool RequiresOuterTask =
9465 D.hasClausesOfKind<OMPDependClause>() ||
9466 D.hasClausesOfKind<OMPNowaitClause>() ||
9467 D.hasClausesOfKind<OMPInReductionClause>() ||
9468 (CGM.getLangOpts().OpenMP >= 51 &&
9469 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9470 D.hasClausesOfKind<OMPThreadLimitClause>());
9472 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9473 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9474 PrePostActionTy &) {
9475 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9476 };
9477 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9478
9480 llvm::Value *MapTypesArray = nullptr;
9481 llvm::Value *MapNamesArray = nullptr;
9482
9483 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9484 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9485 OutlinedFnID, &InputInfo, &MapTypesArray,
9486 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9487 PrePostActionTy &) {
9488 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9489 RequiresOuterTask, CS, OffloadingMandatory,
9490 Device, OutlinedFnID, InputInfo, MapTypesArray,
9491 MapNamesArray, SizeEmitter, CGF, CGM);
9492 };
9493
9494 auto &&TargetElseGen =
9495 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9496 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9497 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9498 CS, OffloadingMandatory, CGF);
9499 };
9500
9501 // If we have a target function ID it means that we need to support
9502 // offloading, otherwise, just execute on the host. We need to execute on host
9503 // regardless of the conditional in the if clause if, e.g., the user do not
9504 // specify target triples.
9505 if (OutlinedFnID) {
9506 if (IfCond) {
9507 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9508 } else {
9509 RegionCodeGenTy ThenRCG(TargetThenGen);
9510 ThenRCG(CGF);
9511 }
9512 } else {
9513 RegionCodeGenTy ElseRCG(TargetElseGen);
9514 ElseRCG(CGF);
9515 }
9516}
9517
9519 StringRef ParentName) {
9520 if (!S)
9521 return;
9522
9523 // Codegen OMP target directives that offload compute to the device.
9524 bool RequiresDeviceCodegen =
9525 isa<OMPExecutableDirective>(S) &&
9527 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9528
9529 if (RequiresDeviceCodegen) {
9530 const auto &E = *cast<OMPExecutableDirective>(S);
9531
9532 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9533 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9534
9535 // Is this a target region that should not be emitted as an entry point? If
9536 // so just signal we are done with this target region.
9537 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9538 return;
9539
9540 switch (E.getDirectiveKind()) {
9541 case OMPD_target:
9543 cast<OMPTargetDirective>(E));
9544 break;
9545 case OMPD_target_parallel:
9547 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9548 break;
9549 case OMPD_target_teams:
9551 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9552 break;
9553 case OMPD_target_teams_distribute:
9555 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9556 break;
9557 case OMPD_target_teams_distribute_simd:
9559 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9560 break;
9561 case OMPD_target_parallel_for:
9563 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9564 break;
9565 case OMPD_target_parallel_for_simd:
9567 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9568 break;
9569 case OMPD_target_simd:
9571 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9572 break;
9573 case OMPD_target_teams_distribute_parallel_for:
9575 CGM, ParentName,
9576 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9577 break;
9578 case OMPD_target_teams_distribute_parallel_for_simd:
9581 CGM, ParentName,
9582 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9583 break;
9584 case OMPD_target_teams_loop:
9586 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9587 break;
9588 case OMPD_target_parallel_loop:
9590 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9591 break;
9592 case OMPD_parallel:
9593 case OMPD_for:
9594 case OMPD_parallel_for:
9595 case OMPD_parallel_master:
9596 case OMPD_parallel_sections:
9597 case OMPD_for_simd:
9598 case OMPD_parallel_for_simd:
9599 case OMPD_cancel:
9600 case OMPD_cancellation_point:
9601 case OMPD_ordered:
9602 case OMPD_threadprivate:
9603 case OMPD_allocate:
9604 case OMPD_task:
9605 case OMPD_simd:
9606 case OMPD_tile:
9607 case OMPD_unroll:
9608 case OMPD_sections:
9609 case OMPD_section:
9610 case OMPD_single:
9611 case OMPD_master:
9612 case OMPD_critical:
9613 case OMPD_taskyield:
9614 case OMPD_barrier:
9615 case OMPD_taskwait:
9616 case OMPD_taskgroup:
9617 case OMPD_atomic:
9618 case OMPD_flush:
9619 case OMPD_depobj:
9620 case OMPD_scan:
9621 case OMPD_teams:
9622 case OMPD_target_data:
9623 case OMPD_target_exit_data:
9624 case OMPD_target_enter_data:
9625 case OMPD_distribute:
9626 case OMPD_distribute_simd:
9627 case OMPD_distribute_parallel_for:
9628 case OMPD_distribute_parallel_for_simd:
9629 case OMPD_teams_distribute:
9630 case OMPD_teams_distribute_simd:
9631 case OMPD_teams_distribute_parallel_for:
9632 case OMPD_teams_distribute_parallel_for_simd:
9633 case OMPD_target_update:
9634 case OMPD_declare_simd:
9635 case OMPD_declare_variant:
9636 case OMPD_begin_declare_variant:
9637 case OMPD_end_declare_variant:
9638 case OMPD_declare_target:
9639 case OMPD_end_declare_target:
9640 case OMPD_declare_reduction:
9641 case OMPD_declare_mapper:
9642 case OMPD_taskloop:
9643 case OMPD_taskloop_simd:
9644 case OMPD_master_taskloop:
9645 case OMPD_master_taskloop_simd:
9646 case OMPD_parallel_master_taskloop:
9647 case OMPD_parallel_master_taskloop_simd:
9648 case OMPD_requires:
9649 case OMPD_metadirective:
9650 case OMPD_unknown:
9651 default:
9652 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9653 }
9654 return;
9655 }
9656
9657 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9658 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9659 return;
9660
9661 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9662 return;
9663 }
9664
9665 // If this is a lambda function, look into its body.
9666 if (const auto *L = dyn_cast<LambdaExpr>(S))
9667 S = L->getBody();
9668
9669 // Keep looking for target regions recursively.
9670 for (const Stmt *II : S->children())
9671 scanForTargetRegionsFunctions(II, ParentName);
9672}
9673
9674static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9675 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9676 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9677 if (!DevTy)
9678 return false;
9679 // Do not emit device_type(nohost) functions for the host.
9680 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9681 return true;
9682 // Do not emit device_type(host) functions for the device.
9683 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9684 return true;
9685 return false;
9686}
9687
9689 // If emitting code for the host, we do not process FD here. Instead we do
9690 // the normal code generation.
9691 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9692 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9693 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9694 CGM.getLangOpts().OpenMPIsTargetDevice))
9695 return true;
9696 return false;
9697 }
9698
9699 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9700 // Try to detect target regions in the function.
9701 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9702 StringRef Name = CGM.getMangledName(GD);
9703 scanForTargetRegionsFunctions(FD->getBody(), Name);
9704 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9705 CGM.getLangOpts().OpenMPIsTargetDevice))
9706 return true;
9707 }
9708
9709 // Do not to emit function if it is not marked as declare target.
9710 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9711 AlreadyEmittedTargetDecls.count(VD) == 0;
9712}
9713
9715 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9716 CGM.getLangOpts().OpenMPIsTargetDevice))
9717 return true;
9718
9719 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9720 return false;
9721
9722 // Check if there are Ctors/Dtors in this declaration and look for target
9723 // regions in it. We use the complete variant to produce the kernel name
9724 // mangling.
9725 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9726 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9727 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9728 StringRef ParentName =
9730 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9731 }
9732 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9733 StringRef ParentName =
9735 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9736 }
9737 }
9738
9739 // Do not to emit variable if it is not marked as declare target.
9740 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9741 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9742 cast<VarDecl>(GD.getDecl()));
9743 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9744 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9745 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9747 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9748 return true;
9749 }
9750 return false;
9751}
9752
9754 llvm::Constant *Addr) {
9755 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9756 !CGM.getLangOpts().OpenMPIsTargetDevice)
9757 return;
9758
9759 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9760 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9761
9762 // If this is an 'extern' declaration we defer to the canonical definition and
9763 // do not emit an offloading entry.
9764 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
9765 VD->hasExternalStorage())
9766 return;
9767
9768 if (!Res) {
9769 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9770 // Register non-target variables being emitted in device code (debug info
9771 // may cause this).
9772 StringRef VarName = CGM.getMangledName(VD);
9773 EmittedNonTargetVariables.try_emplace(VarName, Addr);
9774 }
9775 return;
9776 }
9777
9778 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
9779 auto LinkageForVariable = [&VD, this]() {
9781 };
9782
9783 std::vector<llvm::GlobalVariable *> GeneratedRefs;
9784 OMPBuilder.registerTargetGlobalVariable(
9787 VD->isExternallyVisible(),
9789 VD->getCanonicalDecl()->getBeginLoc()),
9790 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
9791 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
9794 Addr);
9795
9796 for (auto *ref : GeneratedRefs)
9798}
9799
9801 if (isa<FunctionDecl>(GD.getDecl()) ||
9802 isa<OMPDeclareReductionDecl>(GD.getDecl()))
9803 return emitTargetFunctions(GD);
9804
9805 return emitTargetGlobalVariable(GD);
9806}
9807
9809 for (const VarDecl *VD : DeferredGlobalVariables) {
9810 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9811 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9812 if (!Res)
9813 continue;
9814 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9815 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9817 CGM.EmitGlobal(VD);
9818 } else {
9819 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9820 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9821 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9823 "Expected link clause or to clause with unified memory.");
9825 }
9826 }
9827}
9828
9830 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9831 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9832 " Expected target-based directive.");
9833}
9834
9836 for (const OMPClause *Clause : D->clauselists()) {
9837 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9839 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
9840 } else if (const auto *AC =
9841 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9842 switch (AC->getAtomicDefaultMemOrderKind()) {
9843 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9844 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9845 break;
9846 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9847 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
9848 break;
9849 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
9850 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
9851 break;
9853 break;
9854 }
9855 }
9856 }
9857}
9858
9859llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
9861}
9862
9864 LangAS &AS) {
9865 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9866 return false;
9867 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9868 switch(A->getAllocatorType()) {
9869 case OMPAllocateDeclAttr::OMPNullMemAlloc:
9870 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9871 // Not supported, fallback to the default mem space.
9872 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9873 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9874 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9875 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9876 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9877 case OMPAllocateDeclAttr::OMPConstMemAlloc:
9878 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9879 AS = LangAS::Default;
9880 return true;
9881 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9882 llvm_unreachable("Expected predefined allocator for the variables with the "
9883 "static storage.");
9884 }
9885 return false;
9886}
9887
9890}
9891
9893 CodeGenModule &CGM)
9894 : CGM(CGM) {
9895 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9896 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9898 }
9899}
9900
9902 if (CGM.getLangOpts().OpenMPIsTargetDevice)
9903 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9904}
9905
9907 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
9908 return true;
9909
9910 const auto *D = cast<FunctionDecl>(GD.getDecl());
9911 // Do not to emit function if it is marked as declare target as it was already
9912 // emitted.
9913 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9914 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
9915 if (auto *F = dyn_cast_or_null<llvm::Function>(
9917 return !F->isDeclaration();
9918 return false;
9919 }
9920 return true;
9921 }
9922
9923 return !AlreadyEmittedTargetDecls.insert(D).second;
9924}
9925
9929 llvm::Function *OutlinedFn,
9930 ArrayRef<llvm::Value *> CapturedVars) {
9931 if (!CGF.HaveInsertPoint())
9932 return;
9933
9934 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9936
9937 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9938 llvm::Value *Args[] = {
9939 RTLoc,
9940 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9941 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9943 RealArgs.append(std::begin(Args), std::end(Args));
9944 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9945
9946 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
9947 CGM.getModule(), OMPRTL___kmpc_fork_teams);
9948 CGF.EmitRuntimeCall(RTLFn, RealArgs);
9949}
9950
9952 const Expr *NumTeams,
9953 const Expr *ThreadLimit,
9955 if (!CGF.HaveInsertPoint())
9956 return;
9957
9958 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9959
9960 llvm::Value *NumTeamsVal =
9961 NumTeams
9962 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9963 CGF.CGM.Int32Ty, /* isSigned = */ true)
9964 : CGF.Builder.getInt32(0);
9965
9966 llvm::Value *ThreadLimitVal =
9967 ThreadLimit
9968 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9969 CGF.CGM.Int32Ty, /* isSigned = */ true)
9970 : CGF.Builder.getInt32(0);
9971
9972 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9973 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9974 ThreadLimitVal};
9975 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
9976 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
9977 PushNumTeamsArgs);
9978}
9979
9981 const Expr *ThreadLimit,
9983 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9984 llvm::Value *ThreadLimitVal =
9985 ThreadLimit
9986 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9987 CGF.CGM.Int32Ty, /* isSigned = */ true)
9988 : CGF.Builder.getInt32(0);
9989
9990 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
9991 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
9992 ThreadLimitVal};
9993 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
9994 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
9995 ThreadLimitArgs);
9996}
9997
9999 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10000 const Expr *Device, const RegionCodeGenTy &CodeGen,
10002 if (!CGF.HaveInsertPoint())
10003 return;
10004
10005 // Action used to replace the default codegen action and turn privatization
10006 // off.
10007 PrePostActionTy NoPrivAction;
10008
10009 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10010
10011 llvm::Value *IfCondVal = nullptr;
10012 if (IfCond)
10013 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10014
10015 // Emit device ID if any.
10016 llvm::Value *DeviceID = nullptr;
10017 if (Device) {
10018 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10019 CGF.Int64Ty, /*isSigned=*/true);
10020 } else {
10021 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10022 }
10023
10024 // Fill up the arrays with all the mapped variables.
10025 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10026 auto GenMapInfoCB =
10027 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10028 CGF.Builder.restoreIP(CodeGenIP);
10029 // Get map clause information.
10030 MappableExprsHandler MEHandler(D, CGF);
10031 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10032
10033 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10034 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10035 };
10036 if (CGM.getCodeGenOpts().getDebugInfo() !=
10037 llvm::codegenoptions::NoDebugInfo) {
10038 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10039 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10040 FillInfoMap);
10041 }
10042
10043 return CombinedInfo;
10044 };
10045 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10046 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10047 CGF.Builder.restoreIP(CodeGenIP);
10048 switch (BodyGenType) {
10049 case BodyGenTy::Priv:
10050 if (!Info.CaptureDeviceAddrMap.empty())
10051 CodeGen(CGF);
10052 break;
10053 case BodyGenTy::DupNoPriv:
10054 if (!Info.CaptureDeviceAddrMap.empty()) {
10055 CodeGen.setAction(NoPrivAction);
10056 CodeGen(CGF);
10057 }
10058 break;
10059 case BodyGenTy::NoPriv:
10060 if (Info.CaptureDeviceAddrMap.empty()) {
10061 CodeGen.setAction(NoPrivAction);
10062 CodeGen(CGF);
10063 }
10064 break;
10065 }
10066 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10067 CGF.Builder.GetInsertPoint());
10068 };
10069
10070 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10071 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10072 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10073 }
10074 };
10075
10076 auto CustomMapperCB = [&](unsigned int I) {
10077 llvm::Value *MFunc = nullptr;
10078 if (CombinedInfo.Mappers[I]) {
10079 Info.HasMapper = true;
10081 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10082 }
10083 return MFunc;
10084 };
10085
10086 // Source location for the ident struct
10087 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10088
10089 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10090 CGF.AllocaInsertPt->getIterator());
10091 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10092 CGF.Builder.GetInsertPoint());
10093 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10094 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
10095 OMPBuilder.createTargetData(
10096 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10097 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc);
10098 assert(AfterIP && "unexpected error creating target data");
10099 CGF.Builder.restoreIP(*AfterIP);
10100}
10101
10103 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10104 const Expr *Device) {
10105 if (!CGF.HaveInsertPoint())
10106 return;
10107
10108 assert((isa<OMPTargetEnterDataDirective>(D) ||
10109 isa<OMPTargetExitDataDirective>(D) ||
10110 isa<OMPTargetUpdateDirective>(D)) &&
10111 "Expecting either target enter, exit data, or update directives.");
10112
10114 llvm::Value *MapTypesArray = nullptr;
10115 llvm::Value *MapNamesArray = nullptr;
10116 // Generate the code for the opening of the data environment.
10117 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10118 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10119 // Emit device ID if any.
10120 llvm::Value *DeviceID = nullptr;
10121 if (Device) {
10122 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10123 CGF.Int64Ty, /*isSigned=*/true);
10124 } else {
10125 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10126 }
10127
10128 // Emit the number of elements in the offloading arrays.
10129 llvm::Constant *PointerNum =
10130 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10131
10132 // Source location for the ident struct
10133 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10134
10135 SmallVector<llvm::Value *, 13> OffloadingArgs(
10136 {RTLoc, DeviceID, PointerNum,
10137 InputInfo.BasePointersArray.emitRawPointer(CGF),
10138 InputInfo.PointersArray.emitRawPointer(CGF),
10139 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10140 InputInfo.MappersArray.emitRawPointer(CGF)});
10141
10142 // Select the right runtime function call for each standalone
10143 // directive.
10144 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10145 RuntimeFunction RTLFn;
10146 switch (D.getDirectiveKind()) {
10147 case OMPD_target_enter_data:
10148 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10149 : OMPRTL___tgt_target_data_begin_mapper;
10150 break;
10151 case OMPD_target_exit_data:
10152 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10153 : OMPRTL___tgt_target_data_end_mapper;
10154 break;
10155 case OMPD_target_update:
10156 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10157 : OMPRTL___tgt_target_data_update_mapper;
10158 break;
10159 case OMPD_parallel:
10160 case OMPD_for:
10161 case OMPD_parallel_for:
10162 case OMPD_parallel_master:
10163 case OMPD_parallel_sections:
10164 case OMPD_for_simd:
10165 case OMPD_parallel_for_simd:
10166 case OMPD_cancel:
10167 case OMPD_cancellation_point:
10168 case OMPD_ordered:
10169 case OMPD_threadprivate:
10170 case OMPD_allocate:
10171 case OMPD_task:
10172 case OMPD_simd:
10173 case OMPD_tile:
10174 case OMPD_unroll:
10175 case OMPD_sections:
10176 case OMPD_section:
10177 case OMPD_single:
10178 case OMPD_master:
10179 case OMPD_critical:
10180 case OMPD_taskyield:
10181 case OMPD_barrier:
10182 case OMPD_taskwait:
10183 case OMPD_taskgroup:
10184 case OMPD_atomic:
10185 case OMPD_flush:
10186 case OMPD_depobj:
10187 case OMPD_scan:
10188 case OMPD_teams:
10189 case OMPD_target_data:
10190 case OMPD_distribute:
10191 case OMPD_distribute_simd:
10192 case OMPD_distribute_parallel_for:
10193 case OMPD_distribute_parallel_for_simd:
10194 case OMPD_teams_distribute:
10195 case OMPD_teams_distribute_simd:
10196 case OMPD_teams_distribute_parallel_for:
10197 case OMPD_teams_distribute_parallel_for_simd:
10198 case OMPD_declare_simd:
10199 case OMPD_declare_variant:
10200 case OMPD_begin_declare_variant:
10201 case OMPD_end_declare_variant:
10202 case OMPD_declare_target:
10203 case OMPD_end_declare_target:
10204 case OMPD_declare_reduction:
10205 case OMPD_declare_mapper:
10206 case OMPD_taskloop:
10207 case OMPD_taskloop_simd:
10208 case OMPD_master_taskloop:
10209 case OMPD_master_taskloop_simd:
10210 case OMPD_parallel_master_taskloop:
10211 case OMPD_parallel_master_taskloop_simd:
10212 case OMPD_target:
10213 case OMPD_target_simd:
10214 case OMPD_target_teams_distribute:
10215 case OMPD_target_teams_distribute_simd:
10216 case OMPD_target_teams_distribute_parallel_for:
10217 case OMPD_target_teams_distribute_parallel_for_simd:
10218 case OMPD_target_teams:
10219 case OMPD_target_parallel:
10220 case OMPD_target_parallel_for:
10221 case OMPD_target_parallel_for_simd:
10222 case OMPD_requires:
10223 case OMPD_metadirective:
10224 case OMPD_unknown:
10225 default:
10226 llvm_unreachable("Unexpected standalone target data directive.");
10227 break;
10228 }
10229 if (HasNowait) {
10230 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10231 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10232 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10233 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10234 }
10235 CGF.EmitRuntimeCall(
10236 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10237 OffloadingArgs);
10238 };
10239
10240 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10241 &MapNamesArray](CodeGenFunction &CGF,
10242 PrePostActionTy &) {
10243 // Fill up the arrays with all the mapped variables.
10244 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10246 MappableExprsHandler MEHandler(D, CGF);
10247 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
10248 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10249 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10250
10251 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10252 D.hasClausesOfKind<OMPNowaitClause>();
10253
10254 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10255 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10257 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10259 InputInfo.SizesArray =
10260 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10261 InputInfo.MappersArray =
10262 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10263 MapTypesArray = Info.RTArgs.MapTypesArray;
10264 MapNamesArray = Info.RTArgs.MapNamesArray;
10265 if (RequiresOuterTask)
10266 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10267 else
10268 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10269 };
10270
10271 if (IfCond) {
10272 emitIfClause(CGF, IfCond, TargetThenGen,
10273 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10274 } else {
10275 RegionCodeGenTy ThenRCG(TargetThenGen);
10276 ThenRCG(CGF);
10277 }
10278}
10279
10280namespace {
10281 /// Kind of parameter in a function with 'declare simd' directive.
10282enum ParamKindTy {
10283 Linear,
10284 LinearRef,
10285 LinearUVal,
10286 LinearVal,
10287 Uniform,
10288 Vector,
10289};
10290/// Attribute set of the parameter.
10291struct ParamAttrTy {
10292 ParamKindTy Kind = Vector;
10293 llvm::APSInt StrideOrArg;
10294 llvm::APSInt Alignment;
10295 bool HasVarStride = false;
10296};
10297} // namespace
10298
10299static unsigned evaluateCDTSize(const FunctionDecl *FD,
10300 ArrayRef<ParamAttrTy> ParamAttrs) {
10301 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10302 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10303 // of that clause. The VLEN value must be power of 2.
10304 // In other case the notion of the function`s "characteristic data type" (CDT)
10305 // is used to compute the vector length.
10306 // CDT is defined in the following order:
10307 // a) For non-void function, the CDT is the return type.
10308 // b) If the function has any non-uniform, non-linear parameters, then the
10309 // CDT is the type of the first such parameter.
10310 // c) If the CDT determined by a) or b) above is struct, union, or class
10311 // type which is pass-by-value (except for the type that maps to the
10312 // built-in complex data type), the characteristic data type is int.
10313 // d) If none of the above three cases is applicable, the CDT is int.
10314 // The VLEN is then determined based on the CDT and the size of vector
10315 // register of that ISA for which current vector version is generated. The
10316 // VLEN is computed using the formula below:
10317 // VLEN = sizeof(vector_register) / sizeof(CDT),
10318 // where vector register size specified in section 3.2.1 Registers and the
10319 // Stack Frame of original AMD64 ABI document.
10320 QualType RetType = FD->getReturnType();
10321 if (RetType.isNull())
10322 return 0;
10323 ASTContext &C = FD->getASTContext();
10324 QualType CDT;
10325 if (!RetType.isNull() && !RetType->isVoidType()) {
10326 CDT = RetType;
10327 } else {
10328 unsigned Offset = 0;
10329 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10330 if (ParamAttrs[Offset].Kind == Vector)
10331 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10332 ++Offset;
10333 }
10334 if (CDT.isNull()) {
10335 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10336 if (ParamAttrs[I + Offset].Kind == Vector) {
10337 CDT = FD->getParamDecl(I)->getType();
10338 break;
10339 }
10340 }
10341 }
10342 }
10343 if (CDT.isNull())
10344 CDT = C.IntTy;
10345 CDT = CDT->getCanonicalTypeUnqualified();
10346 if (CDT->isRecordType() || CDT->isUnionType())
10347 CDT = C.IntTy;
10348 return C.getTypeSize(CDT);
10349}
10350
10351/// Mangle the parameter part of the vector function name according to
10352/// their OpenMP classification. The mangling function is defined in
10353/// section 4.5 of the AAVFABI(2021Q1).
10354static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10355 SmallString<256> Buffer;
10356 llvm::raw_svector_ostream Out(Buffer);
10357 for (const auto &ParamAttr : ParamAttrs) {
10358 switch (ParamAttr.Kind) {
10359 case Linear:
10360 Out << 'l';
10361 break;
10362 case LinearRef:
10363 Out << 'R';
10364 break;
10365 case LinearUVal:
10366 Out << 'U';
10367 break;
10368 case LinearVal:
10369 Out << 'L';
10370 break;
10371 case Uniform:
10372 Out << 'u';
10373 break;
10374 case Vector:
10375 Out << 'v';
10376 break;
10377 }
10378 if (ParamAttr.HasVarStride)
10379 Out << "s" << ParamAttr.StrideOrArg;
10380 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10381 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10382 // Don't print the step value if it is not present or if it is
10383 // equal to 1.
10384 if (ParamAttr.StrideOrArg < 0)
10385 Out << 'n' << -ParamAttr.StrideOrArg;
10386 else if (ParamAttr.StrideOrArg != 1)
10387 Out << ParamAttr.StrideOrArg;
10388 }
10389
10390 if (!!ParamAttr.Alignment)
10391 Out << 'a' << ParamAttr.Alignment;
10392 }
10393
10394 return std::string(Out.str());
10395}
10396
10397static void
10398emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10399 const llvm::APSInt &VLENVal,
10400 ArrayRef<ParamAttrTy> ParamAttrs,
10401 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10402 struct ISADataTy {
10403 char ISA;
10404 unsigned VecRegSize;
10405 };
10406 ISADataTy ISAData[] = {
10407 {
10408 'b', 128
10409 }, // SSE
10410 {
10411 'c', 256
10412 }, // AVX
10413 {
10414 'd', 256
10415 }, // AVX2
10416 {
10417 'e', 512
10418 }, // AVX512
10419 };
10421 switch (State) {
10422 case OMPDeclareSimdDeclAttr::BS_Undefined:
10423 Masked.push_back('N');
10424 Masked.push_back('M');
10425 break;
10426 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10427 Masked.push_back('N');
10428 break;
10429 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10430 Masked.push_back('M');
10431 break;
10432 }
10433 for (char Mask : Masked) {
10434 for (const ISADataTy &Data : ISAData) {
10435 SmallString<256> Buffer;
10436 llvm::raw_svector_ostream Out(Buffer);
10437 Out << "_ZGV" << Data.ISA << Mask;
10438 if (!VLENVal) {
10439 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10440 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10441 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10442 } else {
10443 Out << VLENVal;
10444 }
10445 Out << mangleVectorParameters(ParamAttrs);
10446 Out << '_' << Fn->getName();
10447 Fn->addFnAttr(Out.str());
10448 }
10449 }
10450}
10451
10452// This are the Functions that are needed to mangle the name of the
10453// vector functions generated by the compiler, according to the rules
10454// defined in the "Vector Function ABI specifications for AArch64",
10455// available at
10456// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10457
10458/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10459static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10460 QT = QT.getCanonicalType();
10461
10462 if (QT->isVoidType())
10463 return false;
10464
10465 if (Kind == ParamKindTy::Uniform)
10466 return false;
10467
10468 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10469 return false;
10470
10471 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10472 !QT->isReferenceType())
10473 return false;
10474
10475 return true;
10476}
10477
10478/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10480 QT = QT.getCanonicalType();
10481 unsigned Size = C.getTypeSize(QT);
10482
10483 // Only scalars and complex within 16 bytes wide set PVB to true.
10484 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10485 return false;
10486
10487 if (QT->isFloatingType())
10488 return true;
10489
10490 if (QT->isIntegerType())
10491 return true;
10492
10493 if (QT->isPointerType())
10494 return true;
10495
10496 // TODO: Add support for complex types (section 3.1.2, item 2).
10497
10498 return false;
10499}
10500
10501/// Computes the lane size (LS) of a return type or of an input parameter,
10502/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10503/// TODO: Add support for references, section 3.2.1, item 1.
10504static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10505 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10507 if (getAArch64PBV(PTy, C))
10508 return C.getTypeSize(PTy);
10509 }
10510 if (getAArch64PBV(QT, C))
10511 return C.getTypeSize(QT);
10512
10513 return C.getTypeSize(C.getUIntPtrType());
10514}
10515
10516// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10517// signature of the scalar function, as defined in 3.2.2 of the
10518// AAVFABI.
10519static std::tuple<unsigned, unsigned, bool>
10521 QualType RetType = FD->getReturnType().getCanonicalType();
10522
10523 ASTContext &C = FD->getASTContext();
10524
10525 bool OutputBecomesInput = false;
10526
10528 if (!RetType->isVoidType()) {
10529 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10530 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10531 OutputBecomesInput = true;
10532 }
10533 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10535 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10536 }
10537
10538 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10539 // The LS of a function parameter / return value can only be a power
10540 // of 2, starting from 8 bits, up to 128.
10541 assert(llvm::all_of(Sizes,
10542 [](unsigned Size) {
10543 return Size == 8 || Size == 16 || Size == 32 ||
10544 Size == 64 || Size == 128;
10545 }) &&
10546 "Invalid size");
10547
10548 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10549 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10550 OutputBecomesInput);
10551}
10552
10553// Function used to add the attribute. The parameter `VLEN` is
10554// templated to allow the use of "x" when targeting scalable functions
10555// for SVE.
10556template <typename T>
10557static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10558 char ISA, StringRef ParSeq,
10559 StringRef MangledName, bool OutputBecomesInput,
10560 llvm::Function *Fn) {
10561 SmallString<256> Buffer;
10562 llvm::raw_svector_ostream Out(Buffer);
10563 Out << Prefix << ISA << LMask << VLEN;
10564 if (OutputBecomesInput)
10565 Out << "v";
10566 Out << ParSeq << "_" << MangledName;
10567 Fn->addFnAttr(Out.str());
10568}
10569
10570// Helper function to generate the Advanced SIMD names depending on
10571// the value of the NDS when simdlen is not present.
10572static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10573 StringRef Prefix, char ISA,
10574 StringRef ParSeq, StringRef MangledName,
10575 bool OutputBecomesInput,
10576 llvm::Function *Fn) {
10577 switch (NDS) {
10578 case 8:
10579 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10580 OutputBecomesInput, Fn);
10581 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10582 OutputBecomesInput, Fn);
10583 break;
10584 case 16:
10585 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10586 OutputBecomesInput, Fn);
10587 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10588 OutputBecomesInput, Fn);
10589 break;
10590 case 32:
10591 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10592 OutputBecomesInput, Fn);
10593 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10594 OutputBecomesInput, Fn);
10595 break;
10596 case 64:
10597 case 128:
10598 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10599 OutputBecomesInput, Fn);
10600 break;
10601 default:
10602 llvm_unreachable("Scalar type is too wide.");
10603 }
10604}
10605
10606/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10608 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10609 ArrayRef<ParamAttrTy> ParamAttrs,
10610 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10611 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10612
10613 // Get basic data for building the vector signature.
10614 const auto Data = getNDSWDS(FD, ParamAttrs);
10615 const unsigned NDS = std::get<0>(Data);
10616 const unsigned WDS = std::get<1>(Data);
10617 const bool OutputBecomesInput = std::get<2>(Data);
10618
10619 // Check the values provided via `simdlen` by the user.
10620 // 1. A `simdlen(1)` doesn't produce vector signatures,
10621 if (UserVLEN == 1) {
10622 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10624 "The clause simdlen(1) has no effect when targeting aarch64.");
10625 CGM.getDiags().Report(SLoc, DiagID);
10626 return;
10627 }
10628
10629 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10630 // Advanced SIMD output.
10631 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10632 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10633 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10634 "power of 2 when targeting Advanced SIMD.");
10635 CGM.getDiags().Report(SLoc, DiagID);
10636 return;
10637 }
10638
10639 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10640 // limits.
10641 if (ISA == 's' && UserVLEN != 0) {
10642 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10643 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10644 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10645 "lanes in the architectural constraints "
10646 "for SVE (min is 128-bit, max is "
10647 "2048-bit, by steps of 128-bit)");
10648 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10649 return;
10650 }
10651 }
10652
10653 // Sort out parameter sequence.
10654 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10655 StringRef Prefix = "_ZGV";
10656 // Generate simdlen from user input (if any).
10657 if (UserVLEN) {
10658 if (ISA == 's') {
10659 // SVE generates only a masked function.
10660 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10661 OutputBecomesInput, Fn);
10662 } else {
10663 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10664 // Advanced SIMD generates one or two functions, depending on
10665 // the `[not]inbranch` clause.
10666 switch (State) {
10667 case OMPDeclareSimdDeclAttr::BS_Undefined:
10668 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10669 OutputBecomesInput, Fn);
10670 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10671 OutputBecomesInput, Fn);
10672 break;
10673 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10674 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10675 OutputBecomesInput, Fn);
10676 break;
10677 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10678 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10679 OutputBecomesInput, Fn);
10680 break;
10681 }
10682 }
10683 } else {
10684 // If no user simdlen is provided, follow the AAVFABI rules for
10685 // generating the vector length.
10686 if (ISA == 's') {
10687 // SVE, section 3.4.1, item 1.
10688 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10689 OutputBecomesInput, Fn);
10690 } else {
10691 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10692 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10693 // two vector names depending on the use of the clause
10694 // `[not]inbranch`.
10695 switch (State) {
10696 case OMPDeclareSimdDeclAttr::BS_Undefined:
10697 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10698 OutputBecomesInput, Fn);
10699 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10700 OutputBecomesInput, Fn);
10701 break;
10702 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10703 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10704 OutputBecomesInput, Fn);
10705 break;
10706 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10707 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10708 OutputBecomesInput, Fn);
10709 break;
10710 }
10711 }
10712 }
10713}
10714
10716 llvm::Function *Fn) {
10718 FD = FD->getMostRecentDecl();
10719 while (FD) {
10720 // Map params to their positions in function decl.
10721 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10722 if (isa<CXXMethodDecl>(FD))
10723 ParamPositions.try_emplace(FD, 0);
10724 unsigned ParamPos = ParamPositions.size();
10725 for (const ParmVarDecl *P : FD->parameters()) {
10726 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10727 ++ParamPos;
10728 }
10729 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10730 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10731 // Mark uniform parameters.
10732 for (const Expr *E : Attr->uniforms()) {
10733 E = E->IgnoreParenImpCasts();
10734 unsigned Pos;
10735 if (isa<CXXThisExpr>(E)) {
10736 Pos = ParamPositions[FD];
10737 } else {
10738 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10739 ->getCanonicalDecl();
10740 auto It = ParamPositions.find(PVD);
10741 assert(It != ParamPositions.end() && "Function parameter not found");
10742 Pos = It->second;
10743 }
10744 ParamAttrs[Pos].Kind = Uniform;
10745 }
10746 // Get alignment info.
10747 auto *NI = Attr->alignments_begin();
10748 for (const Expr *E : Attr->aligneds()) {
10749 E = E->IgnoreParenImpCasts();
10750 unsigned Pos;
10751 QualType ParmTy;
10752 if (isa<CXXThisExpr>(E)) {
10753 Pos = ParamPositions[FD];
10754 ParmTy = E->getType();
10755 } else {
10756 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10757 ->getCanonicalDecl();
10758 auto It = ParamPositions.find(PVD);
10759 assert(It != ParamPositions.end() && "Function parameter not found");
10760 Pos = It->second;
10761 ParmTy = PVD->getType();
10762 }
10763 ParamAttrs[Pos].Alignment =
10764 (*NI)
10765 ? (*NI)->EvaluateKnownConstInt(C)
10766 : llvm::APSInt::getUnsigned(
10767 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10768 .getQuantity());
10769 ++NI;
10770 }
10771 // Mark linear parameters.
10772 auto *SI = Attr->steps_begin();
10773 auto *MI = Attr->modifiers_begin();
10774 for (const Expr *E : Attr->linears()) {
10775 E = E->IgnoreParenImpCasts();
10776 unsigned Pos;
10777 bool IsReferenceType = false;
10778 // Rescaling factor needed to compute the linear parameter
10779 // value in the mangled name.
10780 unsigned PtrRescalingFactor = 1;
10781 if (isa<CXXThisExpr>(E)) {
10782 Pos = ParamPositions[FD];
10783 auto *P = cast<PointerType>(E->getType());
10784 PtrRescalingFactor = CGM.getContext()
10785 .getTypeSizeInChars(P->getPointeeType())
10786 .getQuantity();
10787 } else {
10788 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10789 ->getCanonicalDecl();
10790 auto It = ParamPositions.find(PVD);
10791 assert(It != ParamPositions.end() && "Function parameter not found");
10792 Pos = It->second;
10793 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10794 PtrRescalingFactor = CGM.getContext()
10795 .getTypeSizeInChars(P->getPointeeType())
10796 .getQuantity();
10797 else if (PVD->getType()->isReferenceType()) {
10798 IsReferenceType = true;
10799 PtrRescalingFactor =
10800 CGM.getContext()
10801 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
10802 .getQuantity();
10803 }
10804 }
10805 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10806 if (*MI == OMPC_LINEAR_ref)
10807 ParamAttr.Kind = LinearRef;
10808 else if (*MI == OMPC_LINEAR_uval)
10809 ParamAttr.Kind = LinearUVal;
10810 else if (IsReferenceType)
10811 ParamAttr.Kind = LinearVal;
10812 else
10813 ParamAttr.Kind = Linear;
10814 // Assuming a stride of 1, for `linear` without modifiers.
10815 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10816 if (*SI) {
10818 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10819 if (const auto *DRE =
10820 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10821 if (const auto *StridePVD =
10822 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
10823 ParamAttr.HasVarStride = true;
10824 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
10825 assert(It != ParamPositions.end() &&
10826 "Function parameter not found");
10827 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
10828 }
10829 }
10830 } else {
10831 ParamAttr.StrideOrArg = Result.Val.getInt();
10832 }
10833 }
10834 // If we are using a linear clause on a pointer, we need to
10835 // rescale the value of linear_step with the byte size of the
10836 // pointee type.
10837 if (!ParamAttr.HasVarStride &&
10838 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
10839 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
10840 ++SI;
10841 ++MI;
10842 }
10843 llvm::APSInt VLENVal;
10844 SourceLocation ExprLoc;
10845 const Expr *VLENExpr = Attr->getSimdlen();
10846 if (VLENExpr) {
10847 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10848 ExprLoc = VLENExpr->getExprLoc();
10849 }
10850 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10851 if (CGM.getTriple().isX86()) {
10852 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10853 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10854 unsigned VLEN = VLENVal.getExtValue();
10855 StringRef MangledName = Fn->getName();
10856 if (CGM.getTarget().hasFeature("sve"))
10857 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10858 MangledName, 's', 128, Fn, ExprLoc);
10859 else if (CGM.getTarget().hasFeature("neon"))
10860 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10861 MangledName, 'n', 128, Fn, ExprLoc);
10862 }
10863 }
10864 FD = FD->getPreviousDecl();
10865 }
10866}
10867
10868namespace {
10869/// Cleanup action for doacross support.
10870class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10871public:
10872 static const int DoacrossFinArgs = 2;
10873
10874private:
10875 llvm::FunctionCallee RTLFn;
10876 llvm::Value *Args[DoacrossFinArgs];
10877
10878public:
10879 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10880 ArrayRef<llvm::Value *> CallArgs)
10881 : RTLFn(RTLFn) {
10882 assert(CallArgs.size() == DoacrossFinArgs);
10883 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10884 }
10885 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10886 if (!CGF.HaveInsertPoint())
10887 return;
10888 CGF.EmitRuntimeCall(RTLFn, Args);
10889 }
10890};
10891} // namespace
10892
10894 const OMPLoopDirective &D,
10895 ArrayRef<Expr *> NumIterations) {
10896 if (!CGF.HaveInsertPoint())
10897 return;
10898
10900 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10901 RecordDecl *RD;
10902 if (KmpDimTy.isNull()) {
10903 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
10904 // kmp_int64 lo; // lower
10905 // kmp_int64 up; // upper
10906 // kmp_int64 st; // stride
10907 // };
10908 RD = C.buildImplicitRecord("kmp_dim");
10909 RD->startDefinition();
10910 addFieldToRecordDecl(C, RD, Int64Ty);
10911 addFieldToRecordDecl(C, RD, Int64Ty);
10912 addFieldToRecordDecl(C, RD, Int64Ty);
10913 RD->completeDefinition();
10914 KmpDimTy = C.getRecordType(RD);
10915 } else {
10916 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10917 }
10918 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10919 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
10921
10922 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10923 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10924 enum { LowerFD = 0, UpperFD, StrideFD };
10925 // Fill dims with data.
10926 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10927 LValue DimsLVal = CGF.MakeAddrLValue(
10928 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10929 // dims.upper = num_iterations;
10930 LValue UpperLVal = CGF.EmitLValueForField(
10931 DimsLVal, *std::next(RD->field_begin(), UpperFD));
10932 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
10933 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
10934 Int64Ty, NumIterations[I]->getExprLoc());
10935 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10936 // dims.stride = 1;
10937 LValue StrideLVal = CGF.EmitLValueForField(
10938 DimsLVal, *std::next(RD->field_begin(), StrideFD));
10939 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10940 StrideLVal);
10941 }
10942
10943 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10944 // kmp_int32 num_dims, struct kmp_dim * dims);
10945 llvm::Value *Args[] = {
10947 getThreadID(CGF, D.getBeginLoc()),
10948 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10950 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
10951 CGM.VoidPtrTy)};
10952
10953 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10954 CGM.getModule(), OMPRTL___kmpc_doacross_init);
10955 CGF.EmitRuntimeCall(RTLFn, Args);
10956 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10958 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10959 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
10960 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10961 llvm::ArrayRef(FiniArgs));
10962}
10963
10964template <typename T>
10966 const T *C, llvm::Value *ULoc,
10967 llvm::Value *ThreadID) {
10968 QualType Int64Ty =
10969 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10970 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10972 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
10973 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10974 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10975 const Expr *CounterVal = C->getLoopData(I);
10976 assert(CounterVal);
10977 llvm::Value *CntVal = CGF.EmitScalarConversion(
10978 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10979 CounterVal->getExprLoc());
10980 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10981 /*Volatile=*/false, Int64Ty);
10982 }
10983 llvm::Value *Args[] = {
10984 ULoc, ThreadID,
10985 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
10986 llvm::FunctionCallee RTLFn;
10987 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
10988 OMPDoacrossKind<T> ODK;
10989 if (ODK.isSource(C)) {
10990 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10991 OMPRTL___kmpc_doacross_post);
10992 } else {
10993 assert(ODK.isSink(C) && "Expect sink modifier.");
10994 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10995 OMPRTL___kmpc_doacross_wait);
10996 }
10997 CGF.EmitRuntimeCall(RTLFn, Args);
10998}
10999
11001 const OMPDependClause *C) {
11002 return EmitDoacrossOrdered<OMPDependClause>(
11003 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11004 getThreadID(CGF, C->getBeginLoc()));
11005}
11006
11008 const OMPDoacrossClause *C) {
11009 return EmitDoacrossOrdered<OMPDoacrossClause>(
11010 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11011 getThreadID(CGF, C->getBeginLoc()));
11012}
11013
11015 llvm::FunctionCallee Callee,
11016 ArrayRef<llvm::Value *> Args) const {
11017 assert(Loc.isValid() && "Outlined function call location must be valid.");
11019
11020 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11021 if (Fn->doesNotThrow()) {
11022 CGF.EmitNounwindRuntimeCall(Fn, Args);
11023 return;
11024 }
11025 }
11026 CGF.EmitRuntimeCall(Callee, Args);
11027}
11028
11030 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11031 ArrayRef<llvm::Value *> Args) const {
11032 emitCall(CGF, Loc, OutlinedFn, Args);
11033}
11034
11036 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11037 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11039}
11040
11042 const VarDecl *NativeParam,
11043 const VarDecl *TargetParam) const {
11044 return CGF.GetAddrOfLocalVar(NativeParam);
11045}
11046
11047/// Return allocator value from expression, or return a null allocator (default
11048/// when no allocator specified).
11049static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11050 const Expr *Allocator) {
11051 llvm::Value *AllocVal;
11052 if (Allocator) {
11053 AllocVal = CGF.EmitScalarExpr(Allocator);
11054 // According to the standard, the original allocator type is a enum
11055 // (integer). Convert to pointer type, if required.
11056 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11057 CGF.getContext().VoidPtrTy,
11058 Allocator->getExprLoc());
11059 } else {
11060 // If no allocator specified, it defaults to the null allocator.
11061 AllocVal = llvm::Constant::getNullValue(
11063 }
11064 return AllocVal;
11065}
11066
11067/// Return the alignment from an allocate directive if present.
11068static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11069 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11070
11071 if (!AllocateAlignment)
11072 return nullptr;
11073
11074 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11075}
11076
11078 const VarDecl *VD) {
11079 if (!VD)
11080 return Address::invalid();
11081 Address UntiedAddr = Address::invalid();
11082 Address UntiedRealAddr = Address::invalid();
11083 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11084 if (It != FunctionToUntiedTaskStackMap.end()) {
11085 const UntiedLocalVarsAddressesMap &UntiedData =
11086 UntiedLocalVarsStack[It->second];
11087 auto I = UntiedData.find(VD);
11088 if (I != UntiedData.end()) {
11089 UntiedAddr = I->second.first;
11090 UntiedRealAddr = I->second.second;
11091 }
11092 }
11093 const VarDecl *CVD = VD->getCanonicalDecl();
11094 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11095 // Use the default allocation.
11096 if (!isAllocatableDecl(VD))
11097 return UntiedAddr;
11098 llvm::Value *Size;
11099 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11100 if (CVD->getType()->isVariablyModifiedType()) {
11101 Size = CGF.getTypeSize(CVD->getType());
11102 // Align the size: ((size + align - 1) / align) * align
11103 Size = CGF.Builder.CreateNUWAdd(
11104 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11105 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11106 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11107 } else {
11109 Size = CGM.getSize(Sz.alignTo(Align));
11110 }
11111 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11112 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11113 const Expr *Allocator = AA->getAllocator();
11114 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11115 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11117 Args.push_back(ThreadID);
11118 if (Alignment)
11119 Args.push_back(Alignment);
11120 Args.push_back(Size);
11121 Args.push_back(AllocVal);
11122 llvm::omp::RuntimeFunction FnID =
11123 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11124 llvm::Value *Addr = CGF.EmitRuntimeCall(
11125 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11126 getName({CVD->getName(), ".void.addr"}));
11127 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11128 CGM.getModule(), OMPRTL___kmpc_free);
11131 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11132 if (UntiedAddr.isValid())
11133 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11134
11135 // Cleanup action for allocate support.
11136 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11137 llvm::FunctionCallee RTLFn;
11138 SourceLocation::UIntTy LocEncoding;
11139 Address Addr;
11140 const Expr *AllocExpr;
11141
11142 public:
11143 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11144 SourceLocation::UIntTy LocEncoding, Address Addr,
11145 const Expr *AllocExpr)
11146 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11147 AllocExpr(AllocExpr) {}
11148 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11149 if (!CGF.HaveInsertPoint())
11150 return;
11151 llvm::Value *Args[3];
11152 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11153 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11155 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11156 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11157 Args[2] = AllocVal;
11158 CGF.EmitRuntimeCall(RTLFn, Args);
11159 }
11160 };
11161 Address VDAddr =
11162 UntiedRealAddr.isValid()
11163 ? UntiedRealAddr
11164 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11165 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11166 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11167 VDAddr, Allocator);
11168 if (UntiedRealAddr.isValid())
11169 if (auto *Region =
11170 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11171 Region->emitUntiedSwitch(CGF);
11172 return VDAddr;
11173 }
11174 return UntiedAddr;
11175}
11176
11178 const VarDecl *VD) const {
11179 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11180 if (It == FunctionToUntiedTaskStackMap.end())
11181 return false;
11182 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11183}
11184
11187 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11188 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11189 if (!NeedToPush)
11190 return;
11192 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11193 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11194 for (const Stmt *Ref : C->private_refs()) {
11195 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11196 const ValueDecl *VD;
11197 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11198 VD = DRE->getDecl();
11199 } else {
11200 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11201 assert((ME->isImplicitCXXThis() ||
11202 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11203 "Expected member of current class.");
11204 VD = ME->getMemberDecl();
11205 }
11206 DS.insert(VD);
11207 }
11208 }
11209}
11210
11212 if (!NeedToPush)
11213 return;
11215}
11216
11218 CodeGenFunction &CGF,
11219 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11220 std::pair<Address, Address>> &LocalVars)
11221 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11222 if (!NeedToPush)
11223 return;
11225 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11226 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11227}
11228
11230 if (!NeedToPush)
11231 return;
11233}
11234
11236 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11237
11238 return llvm::any_of(
11240 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11241}
11242
11243void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11244 const OMPExecutableDirective &S,
11245 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11246 const {
11247 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11248 // Vars in target/task regions must be excluded completely.
11249 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11250 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11252 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11253 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11254 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11255 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11256 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11257 }
11258 }
11259 // Exclude vars in private clauses.
11260 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11261 for (const Expr *Ref : C->varlist()) {
11262 if (!Ref->getType()->isScalarType())
11263 continue;
11264 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11265 if (!DRE)
11266 continue;
11267 NeedToCheckForLPCs.insert(DRE->getDecl());
11268 }
11269 }
11270 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11271 for (const Expr *Ref : C->varlist()) {
11272 if (!Ref->getType()->isScalarType())
11273 continue;
11274 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11275 if (!DRE)
11276 continue;
11277 NeedToCheckForLPCs.insert(DRE->getDecl());
11278 }
11279 }
11280 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11281 for (const Expr *Ref : C->varlist()) {
11282 if (!Ref->getType()->isScalarType())
11283 continue;
11284 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11285 if (!DRE)
11286 continue;
11287 NeedToCheckForLPCs.insert(DRE->getDecl());
11288 }
11289 }
11290 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11291 for (const Expr *Ref : C->varlist()) {
11292 if (!Ref->getType()->isScalarType())
11293 continue;
11294 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11295 if (!DRE)
11296 continue;
11297 NeedToCheckForLPCs.insert(DRE->getDecl());
11298 }
11299 }
11300 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11301 for (const Expr *Ref : C->varlist()) {
11302 if (!Ref->getType()->isScalarType())
11303 continue;
11304 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11305 if (!DRE)
11306 continue;
11307 NeedToCheckForLPCs.insert(DRE->getDecl());
11308 }
11309 }
11310 for (const Decl *VD : NeedToCheckForLPCs) {
11311 for (const LastprivateConditionalData &Data :
11313 if (Data.DeclToUniqueName.count(VD) > 0) {
11314 if (!Data.Disabled)
11315 NeedToAddForLPCsAsDisabled.insert(VD);
11316 break;
11317 }
11318 }
11319 }
11320}
11321
11322CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11323 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11324 : CGM(CGF.CGM),
11325 Action((CGM.getLangOpts().OpenMP >= 50 &&
11326 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11327 [](const OMPLastprivateClause *C) {
11328 return C->getKind() ==
11329 OMPC_LASTPRIVATE_conditional;
11330 }))
11331 ? ActionToDo::PushAsLastprivateConditional
11332 : ActionToDo::DoNotPush) {
11333 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11334 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11335 return;
11336 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11337 "Expected a push action.");
11340 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11341 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11342 continue;
11343
11344 for (const Expr *Ref : C->varlist()) {
11345 Data.DeclToUniqueName.insert(std::make_pair(
11346 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11347 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11348 }
11349 }
11350 Data.IVLVal = IVLVal;
11351 Data.Fn = CGF.CurFn;
11352}
11353
11354CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11356 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11357 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11358 if (CGM.getLangOpts().OpenMP < 50)
11359 return;
11360 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11361 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11362 if (!NeedToAddForLPCsAsDisabled.empty()) {
11363 Action = ActionToDo::DisableLastprivateConditional;
11364 LastprivateConditionalData &Data =
11365 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11366 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11367 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11368 Data.Fn = CGF.CurFn;
11369 Data.Disabled = true;
11370 }
11371}
11372
11375 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11376 return LastprivateConditionalRAII(CGF, S);
11377}
11378
11380 if (CGM.getLangOpts().OpenMP < 50)
11381 return;
11382 if (Action == ActionToDo::DisableLastprivateConditional) {
11383 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11384 "Expected list of disabled private vars.");
11386 }
11387 if (Action == ActionToDo::PushAsLastprivateConditional) {
11388 assert(
11390 "Expected list of lastprivate conditional vars.");
11392 }
11393}
11394
11396 const VarDecl *VD) {
11398 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11399 QualType NewType;
11400 const FieldDecl *VDField;
11401 const FieldDecl *FiredField;
11402 LValue BaseLVal;
11403 auto VI = I->getSecond().find(VD);
11404 if (VI == I->getSecond().end()) {
11405 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11406 RD->startDefinition();
11407 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11408 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11409 RD->completeDefinition();
11410 NewType = C.getRecordType(RD);
11411 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11412 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11413 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11414 } else {
11415 NewType = std::get<0>(VI->getSecond());
11416 VDField = std::get<1>(VI->getSecond());
11417 FiredField = std::get<2>(VI->getSecond());
11418 BaseLVal = std::get<3>(VI->getSecond());
11419 }
11420 LValue FiredLVal =
11421 CGF.EmitLValueForField(BaseLVal, FiredField);
11423 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11424 FiredLVal);
11425 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11426}
11427
11428namespace {
11429/// Checks if the lastprivate conditional variable is referenced in LHS.
11430class LastprivateConditionalRefChecker final
11431 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11433 const Expr *FoundE = nullptr;
11434 const Decl *FoundD = nullptr;
11435 StringRef UniqueDeclName;
11436 LValue IVLVal;
11437 llvm::Function *FoundFn = nullptr;
11439
11440public:
11441 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11443 llvm::reverse(LPM)) {
11444 auto It = D.DeclToUniqueName.find(E->getDecl());
11445 if (It == D.DeclToUniqueName.end())
11446 continue;
11447 if (D.Disabled)
11448 return false;
11449 FoundE = E;
11450 FoundD = E->getDecl()->getCanonicalDecl();
11451 UniqueDeclName = It->second;
11452 IVLVal = D.IVLVal;
11453 FoundFn = D.Fn;
11454 break;
11455 }
11456 return FoundE == E;
11457 }
11458 bool VisitMemberExpr(const MemberExpr *E) {
11459 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11460 return false;
11462 llvm::reverse(LPM)) {
11463 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11464 if (It == D.DeclToUniqueName.end())
11465 continue;
11466 if (D.Disabled)
11467 return false;
11468 FoundE = E;
11469 FoundD = E->getMemberDecl()->getCanonicalDecl();
11470 UniqueDeclName = It->second;
11471 IVLVal = D.IVLVal;
11472 FoundFn = D.Fn;
11473 break;
11474 }
11475 return FoundE == E;
11476 }
11477 bool VisitStmt(const Stmt *S) {
11478 for (const Stmt *Child : S->children()) {
11479 if (!Child)
11480 continue;
11481 if (const auto *E = dyn_cast<Expr>(Child))
11482 if (!E->isGLValue())
11483 continue;
11484 if (Visit(Child))
11485 return true;
11486 }
11487 return false;
11488 }
11489 explicit LastprivateConditionalRefChecker(
11491 : LPM(LPM) {}
11492 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11493 getFoundData() const {
11494 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11495 }
11496};
11497} // namespace
11498
11500 LValue IVLVal,
11501 StringRef UniqueDeclName,
11502 LValue LVal,
11504 // Last updated loop counter for the lastprivate conditional var.
11505 // int<xx> last_iv = 0;
11506 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11507 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11508 LLIVTy, getName({UniqueDeclName, "iv"}));
11509 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11510 IVLVal.getAlignment().getAsAlign());
11511 LValue LastIVLVal =
11512 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11513
11514 // Last value of the lastprivate conditional.
11515 // decltype(priv_a) last_a;
11516 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11517 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11518 cast<llvm::GlobalVariable>(Last)->setAlignment(
11519 LVal.getAlignment().getAsAlign());
11520 LValue LastLVal =
11521 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11522
11523 // Global loop counter. Required to handle inner parallel-for regions.
11524 // iv
11525 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11526
11527 // #pragma omp critical(a)
11528 // if (last_iv <= iv) {
11529 // last_iv = iv;
11530 // last_a = priv_a;
11531 // }
11532 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11533 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11534 Action.Enter(CGF);
11535 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11536 // (last_iv <= iv) ? Check if the variable is updated and store new
11537 // value in global var.
11538 llvm::Value *CmpRes;
11539 if (IVLVal.getType()->isSignedIntegerType()) {
11540 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11541 } else {
11542 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11543 "Loop iteration variable must be integer.");
11544 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11545 }
11546 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11547 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11548 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11549 // {
11550 CGF.EmitBlock(ThenBB);
11551
11552 // last_iv = iv;
11553 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11554
11555 // last_a = priv_a;
11556 switch (CGF.getEvaluationKind(LVal.getType())) {
11557 case TEK_Scalar: {
11558 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11559 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11560 break;
11561 }
11562 case TEK_Complex: {
11564 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11565 break;
11566 }
11567 case TEK_Aggregate:
11568 llvm_unreachable(
11569 "Aggregates are not supported in lastprivate conditional.");
11570 }
11571 // }
11572 CGF.EmitBranch(ExitBB);
11573 // There is no need to emit line number for unconditional branch.
11575 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11576 };
11577
11578 if (CGM.getLangOpts().OpenMPSimd) {
11579 // Do not emit as a critical region as no parallel region could be emitted.
11580 RegionCodeGenTy ThenRCG(CodeGen);
11581 ThenRCG(CGF);
11582 } else {
11583 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11584 }
11585}
11586
11588 const Expr *LHS) {
11589 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11590 return;
11591 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11592 if (!Checker.Visit(LHS))
11593 return;
11594 const Expr *FoundE;
11595 const Decl *FoundD;
11596 StringRef UniqueDeclName;
11597 LValue IVLVal;
11598 llvm::Function *FoundFn;
11599 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11600 Checker.getFoundData();
11601 if (FoundFn != CGF.CurFn) {
11602 // Special codegen for inner parallel regions.
11603 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11604 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11605 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11606 "Lastprivate conditional is not found in outer region.");
11607 QualType StructTy = std::get<0>(It->getSecond());
11608 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11609 LValue PrivLVal = CGF.EmitLValue(FoundE);
11611 PrivLVal.getAddress(),
11612 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11613 CGF.ConvertTypeForMem(StructTy));
11614 LValue BaseLVal =
11615 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11616 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11617 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11618 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11619 FiredLVal, llvm::AtomicOrdering::Unordered,
11620 /*IsVolatile=*/true, /*isInit=*/false);
11621 return;
11622 }
11623
11624 // Private address of the lastprivate conditional in the current context.
11625 // priv_a
11626 LValue LVal = CGF.EmitLValue(FoundE);
11627 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11628 FoundE->getExprLoc());
11629}
11630
11633 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11634 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11635 return;
11636 auto Range = llvm::reverse(LastprivateConditionalStack);
11637 auto It = llvm::find_if(
11638 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11639 if (It == Range.end() || It->Fn != CGF.CurFn)
11640 return;
11641 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11642 assert(LPCI != LastprivateConditionalToTypes.end() &&
11643 "Lastprivates must be registered already.");
11645 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11646 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11647 for (const auto &Pair : It->DeclToUniqueName) {
11648 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11649 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11650 continue;
11651 auto I = LPCI->getSecond().find(Pair.first);
11652 assert(I != LPCI->getSecond().end() &&
11653 "Lastprivate must be rehistered already.");
11654 // bool Cmp = priv_a.Fired != 0;
11655 LValue BaseLVal = std::get<3>(I->getSecond());
11656 LValue FiredLVal =
11657 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11658 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11659 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11660 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11661 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11662 // if (Cmp) {
11663 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11664 CGF.EmitBlock(ThenBB);
11665 Address Addr = CGF.GetAddrOfLocalVar(VD);
11666 LValue LVal;
11667 if (VD->getType()->isReferenceType())
11668 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11670 else
11671 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11673 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11674 D.getBeginLoc());
11676 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11677 // }
11678 }
11679}
11680
11682 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11684 if (CGF.getLangOpts().OpenMP < 50)
11685 return;
11686 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11687 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11688 "Unknown lastprivate conditional variable.");
11689 StringRef UniqueName = It->second;
11690 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11691 // The variable was not updated in the region - exit.
11692 if (!GV)
11693 return;
11694 LValue LPLVal = CGF.MakeRawAddrLValue(
11695 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11696 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11697 CGF.EmitStoreOfScalar(Res, PrivLVal);
11698}
11699
11702 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11703 const RegionCodeGenTy &CodeGen) {
11704 llvm_unreachable("Not supported in SIMD-only mode");
11705}
11706
11709 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11710 const RegionCodeGenTy &CodeGen) {
11711 llvm_unreachable("Not supported in SIMD-only mode");
11712}
11713
11715 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11716 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11717 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11718 bool Tied, unsigned &NumberOfParts) {
11719 llvm_unreachable("Not supported in SIMD-only mode");
11720}
11721
11724 llvm::Function *OutlinedFn,
11725 ArrayRef<llvm::Value *> CapturedVars,
11726 const Expr *IfCond,
11727 llvm::Value *NumThreads) {
11728 llvm_unreachable("Not supported in SIMD-only mode");
11729}
11730
11732 CodeGenFunction &CGF, StringRef CriticalName,
11733 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11734 const Expr *Hint) {
11735 llvm_unreachable("Not supported in SIMD-only mode");
11736}
11737
11739 const RegionCodeGenTy &MasterOpGen,
11741 llvm_unreachable("Not supported in SIMD-only mode");
11742}
11743
11745 const RegionCodeGenTy &MasterOpGen,
11747 const Expr *Filter) {
11748 llvm_unreachable("Not supported in SIMD-only mode");
11749}
11750
11753 llvm_unreachable("Not supported in SIMD-only mode");
11754}
11755
11757 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11759 llvm_unreachable("Not supported in SIMD-only mode");
11760}
11761
11763 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11764 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11766 ArrayRef<const Expr *> AssignmentOps) {
11767 llvm_unreachable("Not supported in SIMD-only mode");
11768}
11769
11771 const RegionCodeGenTy &OrderedOpGen,
11773 bool IsThreads) {
11774 llvm_unreachable("Not supported in SIMD-only mode");
11775}
11776
11780 bool EmitChecks,
11781 bool ForceSimpleCall) {
11782 llvm_unreachable("Not supported in SIMD-only mode");
11783}
11784
11787 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11788 bool Ordered, const DispatchRTInput &DispatchValues) {
11789 llvm_unreachable("Not supported in SIMD-only mode");
11790}
11791
11794 llvm_unreachable("Not supported in SIMD-only mode");
11795}
11796
11799 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11800 llvm_unreachable("Not supported in SIMD-only mode");
11801}
11802
11805 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11806 llvm_unreachable("Not supported in SIMD-only mode");
11807}
11808
11811 unsigned IVSize,
11812 bool IVSigned) {
11813 llvm_unreachable("Not supported in SIMD-only mode");
11814}
11815
11818 OpenMPDirectiveKind DKind) {
11819 llvm_unreachable("Not supported in SIMD-only mode");
11820}
11821
11824 unsigned IVSize, bool IVSigned,
11825 Address IL, Address LB,
11826 Address UB, Address ST) {
11827 llvm_unreachable("Not supported in SIMD-only mode");
11828}
11829
11831 llvm::Value *NumThreads,
11833 llvm_unreachable("Not supported in SIMD-only mode");
11834}
11835
11837 ProcBindKind ProcBind,
11839 llvm_unreachable("Not supported in SIMD-only mode");
11840}
11841
11843 const VarDecl *VD,
11844 Address VDAddr,
11846 llvm_unreachable("Not supported in SIMD-only mode");
11847}
11848
11850 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11851 CodeGenFunction *CGF) {
11852 llvm_unreachable("Not supported in SIMD-only mode");
11853}
11854
11856 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11857 llvm_unreachable("Not supported in SIMD-only mode");
11858}
11859
11863 llvm::AtomicOrdering AO) {
11864 llvm_unreachable("Not supported in SIMD-only mode");
11865}
11866
11869 llvm::Function *TaskFunction,
11870 QualType SharedsTy, Address Shareds,
11871 const Expr *IfCond,
11872 const OMPTaskDataTy &Data) {
11873 llvm_unreachable("Not supported in SIMD-only mode");
11874}
11875
11878 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11879 const Expr *IfCond, const OMPTaskDataTy &Data) {
11880 llvm_unreachable("Not supported in SIMD-only mode");
11881}
11882
11886 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11887 assert(Options.SimpleReduction && "Only simple reduction is expected.");
11888 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11889 ReductionOps, Options);
11890}
11891
11894 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11895 llvm_unreachable("Not supported in SIMD-only mode");
11896}
11897
11900 bool IsWorksharingReduction) {
11901 llvm_unreachable("Not supported in SIMD-only mode");
11902}
11903
11906 ReductionCodeGen &RCG,
11907 unsigned N) {
11908 llvm_unreachable("Not supported in SIMD-only mode");
11909}
11910
11913 llvm::Value *ReductionsPtr,
11914 LValue SharedLVal) {
11915 llvm_unreachable("Not supported in SIMD-only mode");
11916}
11917
11920 const OMPTaskDataTy &Data) {
11921 llvm_unreachable("Not supported in SIMD-only mode");
11922}
11923
11926 OpenMPDirectiveKind CancelRegion) {
11927 llvm_unreachable("Not supported in SIMD-only mode");
11928}
11929
11931 SourceLocation Loc, const Expr *IfCond,
11932 OpenMPDirectiveKind CancelRegion) {
11933 llvm_unreachable("Not supported in SIMD-only mode");
11934}
11935
11937 const OMPExecutableDirective &D, StringRef ParentName,
11938 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11939 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11940 llvm_unreachable("Not supported in SIMD-only mode");
11941}
11942
11945 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11946 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11947 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11948 const OMPLoopDirective &D)>
11949 SizeEmitter) {
11950 llvm_unreachable("Not supported in SIMD-only mode");
11951}
11952
11954 llvm_unreachable("Not supported in SIMD-only mode");
11955}
11956
11958 llvm_unreachable("Not supported in SIMD-only mode");
11959}
11960
11962 return false;
11963}
11964
11968 llvm::Function *OutlinedFn,
11969 ArrayRef<llvm::Value *> CapturedVars) {
11970 llvm_unreachable("Not supported in SIMD-only mode");
11971}
11972
11974 const Expr *NumTeams,
11975 const Expr *ThreadLimit,
11977 llvm_unreachable("Not supported in SIMD-only mode");
11978}
11979
11981 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11982 const Expr *Device, const RegionCodeGenTy &CodeGen,
11984 llvm_unreachable("Not supported in SIMD-only mode");
11985}
11986
11988 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11989 const Expr *Device) {
11990 llvm_unreachable("Not supported in SIMD-only mode");
11991}
11992
11994 const OMPLoopDirective &D,
11995 ArrayRef<Expr *> NumIterations) {
11996 llvm_unreachable("Not supported in SIMD-only mode");
11997}
11998
12000 const OMPDependClause *C) {
12001 llvm_unreachable("Not supported in SIMD-only mode");
12002}
12003
12005 const OMPDoacrossClause *C) {
12006 llvm_unreachable("Not supported in SIMD-only mode");
12007}
12008
12009const VarDecl *
12011 const VarDecl *NativeParam) const {
12012 llvm_unreachable("Not supported in SIMD-only mode");
12013}
12014
12015Address
12017 const VarDecl *NativeParam,
12018 const VarDecl *TargetParam) const {
12019 llvm_unreachable("Not supported in SIMD-only mode");
12020}
#define V(N, I)
Definition: ASTContext.h:3443
StringRef P
#define SM(sm)
Definition: Cuda.cpp:84
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, const Expr *Ref)
Generates unique name for artificial threadprivate variables.
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static llvm::Value * emitDynCGGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
const Decl * D
Expr * E
enum clang::sema::@1718::IndirectLocalPathEntry::EntryKind Kind
int Priority
Definition: Format.cpp:3036
#define X(type, name)
Definition: Value.h:144
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
SourceRange Range
Definition: SemaObjC.cpp:758
SourceLocation Loc
Definition: SemaObjC.cpp:759
Defines the SourceManager interface.
const char * Data
This file defines OpenMP AST classes for executable directives and clauses.
SourceLocation Begin
__DEVICE__ int max(int __a, int __b)
#define bool
Definition: amdgpuintrin.h:20
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:188
SourceManager & getSourceManager()
Definition: ASTContext.h:741
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2915
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
Definition: ASTContext.h:2732
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
Definition: ASTContext.h:1187
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition: ASTContext.h:834
CanQualType BoolTy
Definition: ASTContext.h:1161
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1160
const VariableArrayType * getAsVariableArrayType(QualType T) const
Definition: ASTContext.h:2918
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:799
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
Definition: RecordLayout.h:210
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition: Expr.cpp:5177
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3577
Attr - This represents one attribute.
Definition: Attr.h:43
Represents a C++ constructor within a class.
Definition: DeclCXX.h:2553
Represents a C++ destructor within a class.
Definition: DeclCXX.h:2817
Represents a static or instance method of a struct/union/class.
Definition: DeclCXX.h:2078
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition: DeclCXX.h:2204
QualType getFunctionObjectParameterType() const
Definition: DeclCXX.h:2228
Represents a C++ struct/union/class.
Definition: DeclCXX.h:258
base_class_range bases()
Definition: DeclCXX.h:620
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition: DeclCXX.h:1030
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition: DeclCXX.cpp:1735
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition: DeclCXX.h:614
base_class_range vbases()
Definition: DeclCXX.h:637
capture_const_range captures() const
Definition: DeclCXX.h:1109
ctor_range ctors() const
Definition: DeclCXX.h:682
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition: DeclCXX.cpp:2069
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Definition: Redeclarable.h:348
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition: Stmt.h:3797
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition: Stmt.h:3831
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition: Stmt.cpp:1312
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition: Stmt.h:3837
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition: Stmt.h:3825
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition: Stmt.h:3828
This captures a statement into a function.
Definition: Stmt.h:3784
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition: Stmt.h:3935
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition: Stmt.h:3905
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition: Stmt.h:3888
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition: Stmt.cpp:1438
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition: Stmt.h:3930
capture_range captures()
Definition: Stmt.h:3922
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition: CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition: CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition: CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:251
CharUnits getAlignment() const
Definition: Address.h:189
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:207
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition: Address.h:259
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:274
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:267
bool isValid() const
Definition: Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:199
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:896
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:903
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:913
CGBlockInfo - Information to generate a block literal.
Definition: CGBlocks.h:156
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:136
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:292
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:203
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition: CGBuilder.h:241
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:108
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:365
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:278
MangleContext & getMangleContext()
Gets the mangle context.
Definition: CGCXXABI.h:113
CGFunctionInfo - Class to encapsulate the information about a function definition.
Manages list of lastprivate conditional decls for the specified directive.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::FunctionType * Kmpc_MicroTy
The type for a microtask which gets passed to __kmpc_fork_call().
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
llvm::Type * getKmpc_MicroPointerTy()
Returns pointer to kmpc_micro type.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Helper to determine the min/max number of threads/teams for D.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
llvm::SmallVector< NontemporalDeclsSet, 4 > NontemporalDeclsStack
Stack for list of declarations in current context marked as nontemporal.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
CGRecordLayout - This class handles struct and union layout info while lowering AST types to LLVM typ...
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
CGCapturedStmtInfo * CapturedStmtInfo
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
const CodeGen::CGBlockInfo * BlockInfo
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
llvm::Type * ConvertTypeForMem(QualType T)
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
JumpDest ReturnBlock
ReturnBlock - Unified return block.
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
void EmitAutoVarCleanups(const AutoVarEmission &emission)
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
CodeGenTypes & getTypes() const
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
LValue EmitStringLiteralLValue(const StringLiteral *E)
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void handleCUDALaunchBoundsAttr(llvm::Function *F, const CUDALaunchBoundsAttr *A, int32_t *MaxThreadsVal=nullptr, int32_t *MinBlocksVal=nullptr, int32_t *MaxClusterRankVal=nullptr)
Emit the IR encoding to attach the CUDA launch bounds attribute to F.
Definition: NVPTX.cpp:336
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const
Return the store size, in character units, of the given LLVM type.
void handleAMDGPUWavesPerEUAttr(llvm::Function *F, const AMDGPUWavesPerEUAttr *A)
Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to F.
Definition: AMDGPU.cpp:709
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
const TargetInfo & getTarget() const
void EmitGlobal(GlobalDecl D)
Emit code for a single global function or var decl.
void handleAMDGPUFlatWorkGroupSizeAttr(llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, const ReqdWorkGroupSizeAttr *ReqdWGS=nullptr, int32_t *MinThreadsVal=nullptr, int32_t *MaxThreadsVal=nullptr)
Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute to F.
Definition: AMDGPU.cpp:682
llvm::GlobalValue::LinkageTypes getLLVMLinkageVarDefinition(const VarDecl *VD)
Returns LLVM linkage for a declarator.
CGCXXABI & getCXXABI() const
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
const llvm::Triple & getTriple() const
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
llvm::Constant * GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition=NotForDefinition)
ASTContext & getContext() const
const TargetCodeGenInfo & getTargetCodeGenInfo()
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition: CGDecl.cpp:2822
llvm::GlobalValue * GetGlobalValue(StringRef Ref)
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Function * CreateGlobalInitOrCleanUpFunction(llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, SourceLocation Loc=SourceLocation(), bool TLS=false, llvm::GlobalVariable::LinkageTypes Linkage=llvm::GlobalVariable::InternalLinkage)
Definition: CGDeclCXX.cpp:443
llvm::ConstantInt * getSize(CharUnits numChars)
Emit the given number of characters as a value of type size_t.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1630
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:679
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
llvm::Type * ConvertTypeForMem(QualType T)
ConvertTypeForMem - Convert type T into a llvm::Type.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type 'void ()'.
Definition: CGCall.cpp:721
A specialization of Address that requires the address to be an LLVM Constant.
Definition: Address.h:294
static ConstantAddress invalid()
Definition: Address.h:302
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
void popTerminate()
Pops a terminate handler off the stack.
Definition: CGCleanup.h:631
void pushTerminate()
Push a terminate handler on the stack.
Definition: CGCleanup.cpp:243
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:382
LValue - This represents an lvalue references.
Definition: CGValue.h:182
CharUnits getAlignment() const
Definition: CGValue.h:343
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition: CGValue.h:338
Address getAddress() const
Definition: CGValue.h:361
LValueBaseInfo getBaseInfo() const
Definition: CGValue.h:346
QualType getType() const
Definition: CGValue.h:291
TBAAAccessInfo getTBAAInfo() const
Definition: CGValue.h:335
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:42
static RValue get(llvm::Value *V)
Definition: CGValue.h:98
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:71
An abstract representation of an aligned address.
Definition: Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:77
llvm::Value * getPointer() const
Definition: Address.h:66
static RawAddress invalid()
Definition: Address.h:61
bool isValid() const
Definition: Address.h:62
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const
setTargetAttributes - Provides a convenient hook to handle extra target-specific attributes for the g...
Definition: TargetInfo.h:76
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
Definition: StmtVisitor.h:195
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition: DeclBase.h:1435
void addDecl(Decl *D)
Add the declaration D into this context.
Definition: DeclBase.cpp:1768
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1265
ValueDecl * getDecl()
Definition: Expr.h:1333
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:86
SourceLocation getEndLoc() const LLVM_READONLY
Definition: DeclBase.h:438
T * getAttr() const
Definition: DeclBase.h:576
bool hasAttrs() const
Definition: DeclBase.h:521
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:520
void addAttr(Attr *A)
Definition: DeclBase.cpp:1010
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
Definition: DeclBase.h:1082
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:562
SourceLocation getLocation() const
Definition: DeclBase.h:442
DeclContext * getDeclContext()
Definition: DeclBase.h:451
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: DeclBase.h:434
AttrVec & getAttrs()
Definition: DeclBase.h:527
bool hasAttr() const
Definition: DeclBase.h:580
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition: DeclBase.h:967
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Decl.h:786
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1493
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition: Diagnostic.h:896
The return type of classify().
Definition: Expr.h:330
This represents one expression.
Definition: Expr.h:110
bool isGLValue() const
Definition: Expr.h:280
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition: Expr.h:671
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition: Expr.h:669
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3095
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3090
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3587
bool isIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition: Expr.cpp:3951
Represents a member of a struct/union/class.
Definition: Decl.h:3033
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition: Decl.cpp:4555
Represents a function declaration or definition.
Definition: Decl.h:1935
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2672
QualType getReturnType() const
Definition: Decl.h:2720
ArrayRef< ParmVarDecl * > parameters() const
Definition: Decl.h:2649
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:3623
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:3702
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5402
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition: Expr.cpp:973
Describes the capture of a variable or of this, or of a C++1y init-capture.
Definition: LambdaCapture.h:25
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
Definition: LangOptions.h:577
std::vector< llvm::Triple > OMPTargetTriples
Triples of the OpenMP targets that the host code codegen should take into account in order to generat...
Definition: LangOptions.h:573
virtual void mangleCanonicalTypeName(QualType T, raw_ostream &, bool NormalizeIntegers=false)=0
Generates a unique string for an externally visible type for use with TBAA or type uniquing.
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3236
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition: Expr.h:3319
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:280
bool isExternallyVisible() const
Definition: Decl.h:412
This represents clause 'affinity' in the '#pragma omp task'-based directives.
Class that represents a component of a mappable expression.
ArrayRef< MappableComponent > MappableExprComponentListRef
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
Definition: OpenMPClause.h:219
This is a basic class for representing single OpenMP clause.
Definition: OpenMPClause.h:55
This represents '#pragma omp declare mapper ...' directive.
Definition: DeclOpenMP.h:287
This represents '#pragma omp declare reduction ...' directive.
Definition: DeclOpenMP.h:177
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition: DeclOpenMP.h:238
This represents implicit clause 'depend' for the '#pragma omp task' directive.
This represents 'detach' clause in the '#pragma omp task' directive.
This represents 'device' clause in the '#pragma omp ...' directive.
This represents the 'doacross' clause for the '#pragma omp ordered' directive.
This is a basic class for representing single OpenMP executable directive.
Definition: StmtOpenMP.h:266
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)
Definition: StmtOpenMP.h:446
This represents clause 'firstprivate' in the '#pragma omp ...' directives.
This represents clause 'has_device_ptr' in the '#pragma omp ...' directives.
This represents 'if' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:612
Expr * getCondition() const
Returns condition.
Definition: OpenMPClause.h:681
This represents clause 'in_reduction' in the '#pragma omp task' directives.
This represents clause 'is_device_ptr' in the '#pragma omp ...' directives.
OpenMP 5.0 [2.1.6 Iterators] Iterators are identifiers that expand to multiple values in the clause o...
Definition: ExprOpenMP.h:151
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
This represents clause 'linear' in the '#pragma omp ...' directives.
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc....
Definition: StmtOpenMP.h:1004
This represents clause 'map' in the '#pragma omp ...' directives.
This represents clause 'nontemporal' in the '#pragma omp ...' directives.
This represents 'nowait' clause in the '#pragma omp ...' directive.
This represents 'num_teams' clause in the '#pragma omp ...' directive.
This represents 'num_threads' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:761
This represents 'ordered' clause in the '#pragma omp ...' directive.
This represents clause 'private' in the '#pragma omp ...' directives.
This represents clause 'reduction' in the '#pragma omp ...' directives.
This represents '#pragma omp requires...' directive.
Definition: DeclOpenMP.h:417
This represents 'thread_limit' clause in the '#pragma omp ...' directive.
This represents clause 'uses_allocators' in the '#pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the '#pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the '#pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition: Expr.h:1173
Represents a parameter to a function.
Definition: Decl.h:1725
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3198
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition: Type.h:929
void addRestrict()
Add the restrict qualifier to this QualType.
Definition: Type.h:1167
QualType withRestrict() const
Definition: Type.h:1170
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:996
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:7931
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition: Type.h:7971
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:8134
QualType getCanonicalType() const
Definition: Type.h:7983
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition: Type.h:1531
Represents a struct/union/class.
Definition: Decl.h:4148
field_iterator field_end() const
Definition: Decl.h:4357
field_range fields() const
Definition: Decl.h:4354
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition: Decl.cpp:5104
bool field_empty() const
Definition: Decl.h:4362
field_iterator field_begin() const
Definition: Decl.cpp:5092
RecordDecl * getDecl() const
Definition: Type.h:6082
decl_type * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
Definition: Redeclarable.h:203
decl_type * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
Definition: Redeclarable.h:225
Base for LValueReferenceType and RValueReferenceType.
Definition: Type.h:3439
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
fileinfo_iterator fileinfo_end() const
SourceLocation translateFileLineCol(const FileEntry *SourceFile, unsigned Line, unsigned Col) const
Get the source location for the given file:line:col triplet.
fileinfo_iterator fileinfo_begin() const
A trivial tuple used to represent a source range.
Stmt - This represents one statement.
Definition: Stmt.h:84
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:333
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition: Stmt.cpp:204
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:345
void startDefinition()
Starts the definition of this tag declaration.
Definition: Decl.cpp:4760
bool isUnion() const
Definition: Decl.h:3770
bool isTLSSupported() const
Whether the target supports thread-local storage.
Definition: TargetInfo.h:1583
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
Definition: TargetInfo.h:1493
The base class of the type hierarchy.
Definition: Type.h:1828
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1916
bool isVoidType() const
Definition: Type.h:8510
bool isSignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is signed or an enumeration types whose underlying ty...
Definition: Type.cpp:2201
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition: Type.h:8688
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2180
bool isArrayType() const
Definition: Type.h:8258
bool isPointerType() const
Definition: Type.h:8186
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:8550
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8800
bool isReferenceType() const
Definition: Type.h:8204
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:738
bool isLValueReferenceType() const
Definition: Type.h:8208
QualType getCanonicalTypeInternal() const
Definition: Type.h:2989
const RecordType * getAsStructureType() const
Definition: Type.cpp:754
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition: Type.h:8681
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition: Type.h:2724
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition: Type.h:8786
bool isFloatingType() const
Definition: Type.cpp:2283
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2230
bool isAnyPointerType() const
Definition: Type.h:8194
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8731
bool isRecordType() const
Definition: Type.h:8286
bool isUnionType() const
Definition: Type.cpp:704
TagDecl * getAsTagDecl() const
Retrieves the TagDecl that this type refers to, either because the type is a TagType or because it is...
Definition: Type.cpp:1924
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1920
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:671
QualType getType() const
Definition: Decl.h:682
Represents a variable declaration or definition.
Definition: Decl.h:882
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:2246
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition: Decl.cpp:2355
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition: Decl.h:1168
bool hasLocalStorage() const
Returns true if a variable with function scope is a non-static local variable.
Definition: Decl.h:1135
@ DeclarationOnly
This declaration is only a declaration.
Definition: Decl.h:1246
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition: Decl.cpp:2364
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition: Decl.h:1213
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition: Decl.h:1309
Represents a C array with a specified size that is not an integer-constant-expression.
Definition: Type.h:3808
Expr * getSizeExpr() const
Definition: Type.h:3827
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition: AttrIterator.h:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
@ NotKnownNonNull
Definition: Address.h:33
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Ctor_Complete
Complete object ctor.
Definition: ABI.h:25
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition: OpenMPKinds.h:25
@ ICIS_NoInit
No in-class initializer.
Definition: Specifiers.h:272
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition: Lambda.h:37
BinaryOperatorKind
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition: OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition: OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition: OpenMPKinds.h:40
@ CR_OpenMP
Definition: CapturedStmt.h:19
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Definition: OpenMPKinds.h:104
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ Result
The result type of a method or function.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition: OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition: OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition: ABI.h:35
@ Union
The "union" keyword.
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition: Specifiers.h:135
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:139
const FunctionProtoType * T
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
Definition: OpenMPKinds.h:132
@ OMPC_DEVICE_unknown
Definition: OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition: OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition: OpenMPKinds.h:80
@ Other
Other implicit parameter.
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition: OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition: OpenMPKinds.h:35
@ AS_public
Definition: Specifiers.h:124
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition: OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition: OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define false
Definition: stdbool.h:26
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
llvm::IntegerType * IntTy
int
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
Extra information about a function prototype.
Definition: Type.h:5187
Helper expressions and declaration for OMPIteratorExpr class for each iteration space.
Definition: ExprOpenMP.h:111
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition: ExprOpenMP.h:121
Expr * Update
Update expression for the originally specified iteration variable, calculated as VD = Begin + Counter...
Definition: ExprOpenMP.h:119
VarDecl * CounterVD
Internal normalized counter.
Definition: ExprOpenMP.h:113
Data for list of allocators.
Scheduling data for loop-based OpenMP directives.
Definition: OpenMPKinds.h:180
OpenMPScheduleClauseModifier M2
Definition: OpenMPKinds.h:183
OpenMPScheduleClauseModifier M1
Definition: OpenMPKinds.h:182
OpenMPScheduleClauseKind Schedule
Definition: OpenMPKinds.h:181
Describes how types, statements, expressions, and declarations should be printed.
Definition: PrettyPrinter.h:57