20#include "llvm/IR/LegacyPassManager.h"
21#include "llvm/IR/Module.h"
22#include "llvm/MC/TargetRegistry.h"
23#include "llvm/Target/TargetMachine.h"
28 std::unique_ptr<CompilerInstance> DeviceInstance,
31 llvm::Error &Err,
const std::list<PartialTranslationUnit> &PTUs)
33 CodeGenOpts(HostInstance.getCodeGenOpts()),
34 TargetOpts(HostInstance.getTargetOpts()) {
37 DeviceCI = std::move(DeviceInstance);
39 if (!Arch.starts_with(
"sm_") || Arch.substr(3).getAsInteger(10,
SMVersion)) {
40 Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
41 "Invalid CUDA architecture",
42 llvm::inconvertibleErrorCode()));
51 return PTU.takeError();
55 return PTX.takeError();
59 return std::move(Err);
61 std::string FatbinFileName =
62 "/incr_module_" + std::to_string(PTUs.size()) +
".fatbin";
63 VFS->addFile(FatbinFileName, 0,
64 llvm::MemoryBuffer::getMemBuffer(
76 auto &PTU = PTUs.back();
79 const llvm::Target *
Target = llvm::TargetRegistry::lookupTarget(
80 PTU.TheModule->getTargetTriple(), Error);
82 return llvm::make_error<llvm::StringError>(std::move(Error),
84 llvm::TargetOptions TO = llvm::TargetOptions();
85 llvm::TargetMachine *TargetMachine =
Target->createTargetMachine(
87 llvm::Reloc::Model::PIC_);
88 PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
91 llvm::raw_svector_ostream dest(
PTXCode);
93 llvm::legacy::PassManager PM;
94 if (TargetMachine->addPassesToEmitFile(PM, dest,
nullptr,
95 llvm::CodeGenFileType::AssemblyFile)) {
96 return llvm::make_error<llvm::StringError>(
97 "NVPTX backend cannot produce PTX code.",
98 llvm::inconvertibleErrorCode());
101 if (!PM.run(*PTU.TheModule))
102 return llvm::make_error<llvm::StringError>(
"Failed to emit PTX code.",
103 llvm::inconvertibleErrorCode());
113 AddressSize64 = 0x01,
121 struct FatBinInnerHeader {
127 uint32_t CompressedSize;
128 uint32_t SubHeaderSize;
129 uint16_t VersionMinor;
130 uint16_t VersionMajor;
138 uint32_t UncompressedSize;
142 FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
143 :
Kind(1 ), unknown02(0x0101), HeaderSize(
sizeof(*
this)),
144 DataSize(DataSize), unknown0c(0), CompressedSize(0),
145 SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
146 CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
147 unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
148 unknown3c(0), unknown40(0), unknown44(0) {}
151 struct FatBinHeader {
158 FatBinHeader(uint32_t DataSize)
159 : Magic(0xba55ed50), Version(1), HeaderSize(
sizeof(*
this)),
160 DataSize(DataSize), unknown0c(0) {}
163 FatBinHeader OuterHeader(
sizeof(FatBinInnerHeader) +
PTXCode.size());
165 ((
char *)&OuterHeader) + OuterHeader.HeaderSize);
168 FatBinFlags::AddressSize64 |
169 FatBinFlags::HostLinux);
171 ((
char *)&InnerHeader) + InnerHeader.HeaderSize);
175 return llvm::Error::success();
enum clang::sema::@1718::IndirectLocalPathEntry::EntryKind Kind
llvm::MachO::Target Target
Defines the clang::TargetOptions class.
std::string CudaGpuBinaryFileName
Name of file passed with -fcuda-include-gpubinary option to forward to CUDA runtime back-end for inco...
CompilerInstance - Helper class for managing a single instance of the Clang compiler.
llvm::SmallVector< char, 1024 > FatbinContent
llvm::SmallString< 1024 > PTXCode
IncrementalCUDADeviceParser(std::unique_ptr< CompilerInstance > DeviceInstance, CompilerInstance &HostInstance, llvm::IntrusiveRefCntPtr< llvm::vfs::InMemoryFileSystem > VFS, llvm::Error &Err, const std::list< PartialTranslationUnit > &PTUs)
std::unique_ptr< CompilerInstance > DeviceCI
llvm::Expected< TranslationUnitDecl * > Parse(llvm::StringRef Input) override
Parses incremental input by creating an in-memory file.
CodeGenOptions & CodeGenOpts
llvm::IntrusiveRefCntPtr< llvm::vfs::InMemoryFileSystem > VFS
llvm::Expected< llvm::StringRef > GeneratePTX()
~IncrementalCUDADeviceParser()
llvm::Error GenerateFatbinary()
const TargetOptions & TargetOpts
Provides support for incremental compilation.
virtual llvm::Expected< TranslationUnitDecl * > Parse(llvm::StringRef Input)
Parses incremental input by creating an in-memory file.
std::string CPU
If given, the name of the target CPU to generate code for.
The JSON file list parser is used to communicate input to InstallAPI.