clang 20.0.0git
DeviceOffload.cpp
Go to the documentation of this file.
1//===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements offloading to CUDA devices.
10//
11//===----------------------------------------------------------------------===//
12
13#include "DeviceOffload.h"
14
19
20#include "llvm/IR/LegacyPassManager.h"
21#include "llvm/IR/Module.h"
22#include "llvm/MC/TargetRegistry.h"
23#include "llvm/Target/TargetMachine.h"
24
25namespace clang {
26
28 std::unique_ptr<CompilerInstance> DeviceInstance,
29 CompilerInstance &HostInstance,
31 llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs)
32 : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS),
33 CodeGenOpts(HostInstance.getCodeGenOpts()),
34 TargetOpts(HostInstance.getTargetOpts()) {
35 if (Err)
36 return;
37 DeviceCI = std::move(DeviceInstance);
38 StringRef Arch = TargetOpts.CPU;
39 if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
40 Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
41 "Invalid CUDA architecture",
42 llvm::inconvertibleErrorCode()));
43 return;
44 }
45}
46
48IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
49 auto PTU = IncrementalParser::Parse(Input);
50 if (!PTU)
51 return PTU.takeError();
52
53 auto PTX = GeneratePTX();
54 if (!PTX)
55 return PTX.takeError();
56
57 auto Err = GenerateFatbinary();
58 if (Err)
59 return std::move(Err);
60
61 std::string FatbinFileName =
62 "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
63 VFS->addFile(FatbinFileName, 0,
64 llvm::MemoryBuffer::getMemBuffer(
65 llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
66 "", false));
67
68 CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
69
70 FatbinContent.clear();
71
72 return PTU;
73}
74
76 auto &PTU = PTUs.back();
77 std::string Error;
78
79 const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
80 PTU.TheModule->getTargetTriple(), Error);
81 if (!Target)
82 return llvm::make_error<llvm::StringError>(std::move(Error),
83 std::error_code());
84 llvm::TargetOptions TO = llvm::TargetOptions();
85 llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
86 PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO,
87 llvm::Reloc::Model::PIC_);
88 PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
89
90 PTXCode.clear();
91 llvm::raw_svector_ostream dest(PTXCode);
92
93 llvm::legacy::PassManager PM;
94 if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
95 llvm::CodeGenFileType::AssemblyFile)) {
96 return llvm::make_error<llvm::StringError>(
97 "NVPTX backend cannot produce PTX code.",
98 llvm::inconvertibleErrorCode());
99 }
100
101 if (!PM.run(*PTU.TheModule))
102 return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",
103 llvm::inconvertibleErrorCode());
104
105 PTXCode += '\0';
106 while (PTXCode.size() % 8)
107 PTXCode += '\0';
108 return PTXCode.str();
109}
110
112 enum FatBinFlags {
113 AddressSize64 = 0x01,
114 HasDebugInfo = 0x02,
115 ProducerCuda = 0x04,
116 HostLinux = 0x10,
117 HostMac = 0x20,
118 HostWindows = 0x40
119 };
120
121 struct FatBinInnerHeader {
122 uint16_t Kind; // 0x00
123 uint16_t unknown02; // 0x02
124 uint32_t HeaderSize; // 0x04
125 uint32_t DataSize; // 0x08
126 uint32_t unknown0c; // 0x0c
127 uint32_t CompressedSize; // 0x10
128 uint32_t SubHeaderSize; // 0x14
129 uint16_t VersionMinor; // 0x18
130 uint16_t VersionMajor; // 0x1a
131 uint32_t CudaArch; // 0x1c
132 uint32_t unknown20; // 0x20
133 uint32_t unknown24; // 0x24
134 uint32_t Flags; // 0x28
135 uint32_t unknown2c; // 0x2c
136 uint32_t unknown30; // 0x30
137 uint32_t unknown34; // 0x34
138 uint32_t UncompressedSize; // 0x38
139 uint32_t unknown3c; // 0x3c
140 uint32_t unknown40; // 0x40
141 uint32_t unknown44; // 0x44
142 FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
143 : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
144 DataSize(DataSize), unknown0c(0), CompressedSize(0),
145 SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
146 CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
147 unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
148 unknown3c(0), unknown40(0), unknown44(0) {}
149 };
150
151 struct FatBinHeader {
152 uint32_t Magic; // 0x00
153 uint16_t Version; // 0x04
154 uint16_t HeaderSize; // 0x06
155 uint32_t DataSize; // 0x08
156 uint32_t unknown0c; // 0x0c
157 public:
158 FatBinHeader(uint32_t DataSize)
159 : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
160 DataSize(DataSize), unknown0c(0) {}
161 };
162
163 FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
164 FatbinContent.append((char *)&OuterHeader,
165 ((char *)&OuterHeader) + OuterHeader.HeaderSize);
166
167 FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,
168 FatBinFlags::AddressSize64 |
169 FatBinFlags::HostLinux);
170 FatbinContent.append((char *)&InnerHeader,
171 ((char *)&InnerHeader) + InnerHeader.HeaderSize);
172
173 FatbinContent.append(PTXCode.begin(), PTXCode.end());
174
175 return llvm::Error::success();
176}
177
179
180} // namespace clang
enum clang::sema::@1718::IndirectLocalPathEntry::EntryKind Kind
llvm::MachO::Target Target
Definition: MachO.h:51
Defines the clang::TargetOptions class.
std::string CudaGpuBinaryFileName
Name of file passed with -fcuda-include-gpubinary option to forward to CUDA runtime back-end for inco...
CompilerInstance - Helper class for managing a single instance of the Clang compiler.
llvm::SmallVector< char, 1024 > FatbinContent
Definition: DeviceOffload.h:50
llvm::SmallString< 1024 > PTXCode
Definition: DeviceOffload.h:49
IncrementalCUDADeviceParser(std::unique_ptr< CompilerInstance > DeviceInstance, CompilerInstance &HostInstance, llvm::IntrusiveRefCntPtr< llvm::vfs::InMemoryFileSystem > VFS, llvm::Error &Err, const std::list< PartialTranslationUnit > &PTUs)
std::unique_ptr< CompilerInstance > DeviceCI
Definition: DeviceOffload.h:47
llvm::Expected< TranslationUnitDecl * > Parse(llvm::StringRef Input) override
Parses incremental input by creating an in-memory file.
llvm::IntrusiveRefCntPtr< llvm::vfs::InMemoryFileSystem > VFS
Definition: DeviceOffload.h:51
llvm::Expected< llvm::StringRef > GeneratePTX()
const TargetOptions & TargetOpts
Definition: DeviceOffload.h:53
Provides support for incremental compilation.
virtual llvm::Expected< TranslationUnitDecl * > Parse(llvm::StringRef Input)
Parses incremental input by creating an in-memory file.
std::string CPU
If given, the name of the target CPU to generate code for.
Definition: TargetOptions.h:36
The JSON file list parser is used to communicate input to InstallAPI.