19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Bitstream/BitstreamReader.h"
24#include "llvm/Bitstream/BitstreamWriter.h"
25#include "llvm/Support/DJB.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/LockFileManager.h"
28#include "llvm/Support/MemoryBuffer.h"
29#include "llvm/Support/OnDiskHashTable.h"
30#include "llvm/Support/Path.h"
31#include "llvm/Support/TimeProfiler.h"
32#include "llvm/Support/raw_ostream.h"
35using namespace serialization;
43 GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
47 enum IndexRecordTypes {
72class IdentifierIndexReaderTrait {
74 typedef StringRef external_key_type;
75 typedef StringRef internal_key_type;
77 typedef unsigned hash_value_type;
78 typedef unsigned offset_type;
80 static bool EqualKey(
const internal_key_type& a,
const internal_key_type&
b) {
84 static hash_value_type
ComputeHash(
const internal_key_type& a) {
85 return llvm::djbHash(a);
88 static std::pair<unsigned, unsigned>
89 ReadKeyDataLength(
const unsigned char*& d) {
90 using namespace llvm::support;
91 unsigned KeyLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
92 unsigned DataLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
93 return std::make_pair(KeyLen, DataLen);
96 static const internal_key_type&
97 GetInternalKey(
const external_key_type& x) {
return x; }
99 static const external_key_type&
100 GetExternalKey(
const internal_key_type& x) {
return x; }
102 static internal_key_type ReadKey(
const unsigned char* d,
unsigned n) {
103 return StringRef((
const char *)d, n);
106 static data_type ReadData(
const internal_key_type& k,
107 const unsigned char* d,
109 using namespace llvm::support;
112 while (DataLen > 0) {
113 unsigned ID = endian::readNext<uint32_t, llvm::endianness::little>(d);
114 Result.push_back(ID);
122typedef llvm::OnDiskIterableChainedHashTable<IdentifierIndexReaderTrait>
123 IdentifierIndexTable;
127GlobalModuleIndex::GlobalModuleIndex(
128 std::unique_ptr<llvm::MemoryBuffer> IndexBuffer,
129 llvm::BitstreamCursor Cursor)
130 : Buffer(
std::move(IndexBuffer)), IdentifierIndex(), NumIdentifierLookups(),
131 NumIdentifierLookupHits() {
132 auto Fail = [&](llvm::Error &&Err) {
133 report_fatal_error(
"Module index '" + Buffer->getBufferIdentifier() +
134 "' failed: " +
toString(std::move(Err)));
137 llvm::TimeTraceScope TimeScope(
"Module LoadIndex");
139 bool InGlobalIndexBlock =
false;
142 llvm::BitstreamEntry Entry;
146 Fail(Res.takeError());
148 switch (Entry.Kind) {
149 case llvm::BitstreamEntry::Error:
152 case llvm::BitstreamEntry::EndBlock:
153 if (InGlobalIndexBlock) {
154 InGlobalIndexBlock =
false;
161 case llvm::BitstreamEntry::Record:
163 if (InGlobalIndexBlock)
168 case llvm::BitstreamEntry::SubBlock:
169 if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) {
170 if (llvm::Error Err =
Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID))
171 Fail(std::move(Err));
172 InGlobalIndexBlock =
true;
173 }
else if (llvm::Error Err =
Cursor.SkipBlock())
174 Fail(std::move(Err));
182 if (!MaybeIndexRecord)
183 Fail(MaybeIndexRecord.takeError());
184 IndexRecordTypes IndexRecord =
185 static_cast<IndexRecordTypes
>(MaybeIndexRecord.get());
186 switch (IndexRecord) {
198 if (ID == Modules.size())
199 Modules.push_back(ModuleInfo());
201 Modules.resize(ID + 1);
206 Modules[
ID].ModTime =
Record[Idx++];
210 Modules[
ID].FileName.assign(
Record.begin() + Idx,
215 unsigned NumDeps =
Record[Idx++];
216 Modules[
ID].Dependencies.insert(Modules[ID].Dependencies.end(),
218 Record.begin() + Idx + NumDeps);
222 assert(Idx ==
Record.size() &&
"More module info?");
227 StringRef ModuleName = llvm::sys::path::stem(Modules[ID].
FileName);
229 ModuleName = ModuleName.rsplit(
'-').first;
230 UnresolvedModules[ModuleName] =
ID;
234 case IDENTIFIER_INDEX:
237 IdentifierIndex = IdentifierIndexTable::Create(
238 (
const unsigned char *)Blob.data() +
Record[0],
239 (
const unsigned char *)Blob.data() +
sizeof(uint32_t),
240 (
const unsigned char *)Blob.data(), IdentifierIndexReaderTrait());
248 delete static_cast<IdentifierIndexTable *
>(IdentifierIndex);
251std::pair<GlobalModuleIndex *, llvm::Error>
258 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrErr =
259 llvm::MemoryBuffer::getFile(IndexPath.c_str());
261 return std::make_pair(
nullptr,
262 llvm::errorCodeToError(BufferOrErr.getError()));
263 std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get());
266 llvm::BitstreamCursor Cursor(*Buffer);
269 for (
unsigned char C : {
'B',
'C',
'G',
'I'}) {
272 return std::make_pair(
273 nullptr, llvm::createStringError(std::errc::illegal_byte_sequence,
274 "expected signature BCGI"));
276 return std::make_pair(
nullptr, Res.takeError());
279 return std::make_pair(
new GlobalModuleIndex(std::move(Buffer), std::move(Cursor)),
280 llvm::Error::success());
287 llvm::DenseMap<ModuleFile *, unsigned>::iterator Known
288 = ModulesByFile.find(
File);
289 if (Known == ModulesByFile.end())
293 Dependencies.clear();
295 for (
unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) {
297 Dependencies.push_back(MF);
305 if (!IdentifierIndex)
309 ++NumIdentifierLookups;
310 IdentifierIndexTable &Table
311 = *
static_cast<IdentifierIndexTable *
>(IdentifierIndex);
312 IdentifierIndexTable::iterator Known = Table.find(Name);
313 if (Known == Table.end()) {
318 for (
unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
323 ++NumIdentifierLookupHits;
329 StringRef Name =
File->ModuleName;
330 llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name);
331 if (Known == UnresolvedModules.end()) {
336 ModuleInfo &Info = Modules[Known->second];
341 if (
File->File.getSize() == Info.Size &&
342 File->File.getModificationTime() == Info.ModTime) {
344 ModulesByFile[
File] = Known->second;
350 UnresolvedModules.erase(Known);
355 std::fprintf(stderr,
"*** Global Module Index Statistics:\n");
356 if (NumIdentifierLookups) {
357 fprintf(stderr,
" %u / %u identifier lookups succeeded (%f%%)\n",
358 NumIdentifierLookupHits, NumIdentifierLookups,
359 (
double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
361 std::fprintf(stderr,
"\n");
365 llvm::errs() <<
"*** Global Module Index Dump:\n";
366 llvm::errs() <<
"Module files:\n";
367 for (
auto &MI : Modules) {
368 llvm::errs() <<
"** " << MI.FileName <<
"\n";
372 llvm::errs() <<
"\n";
374 llvm::errs() <<
"\n";
383 struct ModuleFileInfo {
393 struct ImportedModuleFileInfo {
395 time_t StoredModTime;
398 : StoredSize(Size), StoredModTime(ModTime), StoredSignature(Sig) {}
402 class GlobalModuleIndexBuilder {
407 using ModuleFilesMap = llvm::MapVector<FileEntryRef, ModuleFileInfo>;
410 ModuleFilesMap ModuleFiles;
414 using ImportedModuleFilesMap =
415 std::multimap<FileEntryRef, ImportedModuleFileInfo>;
418 ImportedModuleFilesMap ImportedModuleFiles;
422 typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
426 InterestingIdentifierMap InterestingIdentifiers;
429 void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
433 auto [It, Inserted] = ModuleFiles.try_emplace(
File);
435 unsigned NewID = ModuleFiles.size();
443 explicit GlobalModuleIndexBuilder(
445 : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr) {}
452 bool writeIndex(llvm::BitstreamWriter &Stream);
457 llvm::BitstreamWriter &Stream,
461 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID,
Record);
464 if (!Name || Name[0] == 0)
return;
467 Record.push_back(*Name++);
468 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME,
Record);
472 llvm::BitstreamWriter &Stream,
477 Record.push_back(*Name++);
478 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME,
Record);
482GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
484 Stream.EnterBlockInfoBlock();
486#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
487#define RECORD(X) emitRecordID(X, #X, Stream, Record)
488 BLOCK(GLOBAL_INDEX_BLOCK);
499 class InterestingASTIdentifierLookupTrait
504 typedef std::pair<StringRef, bool> data_type;
506 data_type ReadData(
const internal_key_type& k,
507 const unsigned char* d,
511 using namespace llvm::support;
513 endian::readNext<IdentifierID, llvm::endianness::little>(d);
514 bool IsInteresting = RawID & 0x01;
515 return std::make_pair(k, IsInteresting);
525 return llvm::createStringError(Buffer.getError(),
526 "failed getting buffer for module file");
529 llvm::BitstreamCursor InStream(PCHContainerRdr.
ExtractPCH(**Buffer));
532 for (
unsigned char C : {
'C',
'P',
'C',
'H'})
535 return llvm::createStringError(std::errc::illegal_byte_sequence,
536 "expected signature CPCH");
538 return Res.takeError();
542 unsigned ID = getModuleFileInfo(
File).ID;
545 enum {
Other, ControlBlock, ASTBlock, DiagnosticOptionsBlock } State =
Other;
550 return MaybeEntry.takeError();
551 llvm::BitstreamEntry Entry = MaybeEntry.get();
553 switch (Entry.Kind) {
554 case llvm::BitstreamEntry::Error:
558 case llvm::BitstreamEntry::Record:
560 if (State ==
Other) {
564 return Skipped.takeError();
570 case llvm::BitstreamEntry::SubBlock:
576 State = ControlBlock;
581 if (llvm::Error Err = InStream.EnterSubBlock(
AST_BLOCK_ID))
594 State = DiagnosticOptionsBlock;
598 if (llvm::Error Err = InStream.SkipBlock())
603 case llvm::BitstreamEntry::EndBlock:
613 return MaybeCode.takeError();
614 unsigned Code = MaybeCode.get();
617 if (State == ControlBlock && Code ==
IMPORT) {
629 Blob = Blob.substr(
Record[Idx++]);
635 off_t StoredSize = (off_t)
Record[Idx++];
636 time_t StoredModTime = (time_t)
Record[Idx++];
642 SignatureBytes.end());
646 unsigned Length =
Record[Idx++];
647 StringRef ImportedFile = Blob.substr(0, Length);
648 Blob = Blob.substr(Length);
656 return llvm::createStringError(std::errc::bad_file_descriptor,
657 "imported file \"%s\" not found",
658 std::string(ImportedFile).c_str());
662 ImportedModuleFiles.insert(std::make_pair(
663 *DependsOnFile, ImportedModuleFileInfo(StoredSize, StoredModTime,
667 unsigned DependsOnID = getModuleFileInfo(*DependsOnFile).ID;
668 getModuleFileInfo(
File).Dependencies.push_back(DependsOnID);
675 typedef llvm::OnDiskIterableChainedHashTable<
676 InterestingASTIdentifierLookupTrait> InterestingIdentifierTable;
677 std::unique_ptr<InterestingIdentifierTable> Table(
678 InterestingIdentifierTable::Create(
679 (
const unsigned char *)Blob.data() +
Record[0],
680 (
const unsigned char *)Blob.data() +
sizeof(uint32_t),
681 (
const unsigned char *)Blob.data()));
682 for (InterestingIdentifierTable::data_iterator
D = Table->data_begin(),
683 DEnd = Table->data_end();
685 std::pair<StringRef, bool> Ident = *
D;
687 InterestingIdentifiers[Ident.first].push_back(ID);
689 (
void)InterestingIdentifiers[Ident.first];
694 if (State == DiagnosticOptionsBlock && Code ==
SIGNATURE) {
697 "Dummy AST file signature not backpatched in ASTWriter.");
698 getModuleFileInfo(
File).Signature = Signature;
704 return llvm::Error::success();
711class IdentifierIndexWriterTrait {
713 typedef StringRef key_type;
714 typedef StringRef key_type_ref;
717 typedef unsigned hash_value_type;
718 typedef unsigned offset_type;
720 static hash_value_type
ComputeHash(key_type_ref Key) {
721 return llvm::djbHash(Key);
724 std::pair<unsigned,unsigned>
725 EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref
Data) {
726 using namespace llvm::support;
727 endian::Writer
LE(Out, llvm::endianness::little);
728 unsigned KeyLen = Key.size();
729 unsigned DataLen =
Data.size() * 4;
730 LE.write<uint16_t>(KeyLen);
731 LE.write<uint16_t>(DataLen);
732 return std::make_pair(KeyLen, DataLen);
735 void EmitKey(raw_ostream& Out, key_type_ref Key,
unsigned KeyLen) {
736 Out.write(Key.data(), KeyLen);
739 void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref
Data,
741 using namespace llvm::support;
742 for (
unsigned I = 0, N =
Data.size(); I != N; ++I)
743 endian::write<uint32_t>(Out,
Data[I], llvm::endianness::little);
749bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
750 for (
auto MapEntry : ImportedModuleFiles) {
751 auto File = MapEntry.first;
752 ImportedModuleFileInfo &Info = MapEntry.second;
753 if (getModuleFileInfo(
File).Signature) {
754 if (getModuleFileInfo(
File).Signature != Info.StoredSignature)
757 }
else if (Info.StoredSize !=
File.getSize() ||
758 Info.StoredModTime !=
File.getModificationTime())
763 using namespace llvm;
764 llvm::TimeTraceScope TimeScope(
"Module WriteIndex");
767 Stream.Emit((
unsigned)
'B', 8);
768 Stream.Emit((
unsigned)
'C', 8);
769 Stream.Emit((
unsigned)
'G', 8);
770 Stream.Emit((
unsigned)
'I', 8);
774 emitBlockInfoBlock(Stream);
776 Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
781 Stream.EmitRecord(INDEX_METADATA,
Record);
784 for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
785 MEnd = ModuleFiles.end();
788 Record.push_back(M->second.ID);
789 Record.push_back(M->first.getSize());
790 Record.push_back(M->first.getModificationTime());
793 StringRef Name(M->first.getName());
794 Record.push_back(Name.size());
795 Record.append(Name.begin(), Name.end());
798 Record.push_back(M->second.Dependencies.size());
799 Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
800 Stream.EmitRecord(MODULE,
Record);
805 llvm::OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait>
Generator;
806 IdentifierIndexWriterTrait Trait;
809 for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
810 IEnd = InterestingIdentifiers.end();
812 Generator.insert(I->first(), I->second, Trait);
819 using namespace llvm::support;
822 endian::write<uint32_t>(Out, 0, llvm::endianness::little);
823 BucketOffset =
Generator.Emit(Out, Trait);
827 auto Abbrev = std::make_shared<BitCodeAbbrev>();
828 Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
829 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
830 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
831 unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
852 llvm::LockFileManager Locked(IndexPath);
854 case llvm::LockFileManager::LFS_Error:
855 return llvm::createStringError(std::errc::io_error,
"LFS error");
857 case llvm::LockFileManager::LFS_Owned:
861 case llvm::LockFileManager::LFS_Shared:
864 return llvm::createStringError(std::errc::device_or_resource_busy,
865 "someone else is building the index");
869 GlobalModuleIndexBuilder Builder(FileMgr, PCHContainerRdr);
873 for (llvm::sys::fs::directory_iterator
D(
Path, EC), DEnd;
877 if (llvm::sys::path::extension(
D->path()) !=
".pcm") {
881 if (llvm::sys::path::extension(
D->path()) ==
".pcm.lock")
882 return llvm::createStringError(std::errc::device_or_resource_busy,
883 "someone else is building the index");
894 if (llvm::Error Err = Builder.loadModuleFile(*
ModuleFile))
901 llvm::BitstreamWriter OutputStream(OutputBuffer);
902 if (Builder.writeIndex(OutputStream))
903 return llvm::createStringError(std::errc::io_error,
904 "failed writing index");
907 return llvm::writeToOutput(IndexPath, [&OutputBuffer](llvm::raw_ostream &OS) {
909 return llvm::Error::success();
916 IdentifierIndexTable::key_iterator Current;
919 IdentifierIndexTable::key_iterator End;
922 explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) {
923 Current = Idx.key_begin();
927 StringRef Next()
override {
931 StringRef Result = *Current;
939 IdentifierIndexTable &Table =
940 *
static_cast<IdentifierIndexTable *
>(IdentifierIndex);
941 return new GlobalIndexIdentifierIterator(Table);
Defines the clang::FileManager interface and associated types.
static void emitRecordID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
static const unsigned CurrentVersion
The global index file version.
static const char *const IndexFileName
The name of the global index file.
static void emitBlockID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
llvm::MachO::Record Record
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
#define IMPORT(DERIVED, BASE)
#define BLOCK(DERIVED, BASE)
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Implements support for file system lookup, file system caching, and directory search management.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(FileEntryRef Entry, bool isVolatile=false, bool RequiresNullTerminator=true, std::optional< int64_t > MaybeLimit=std::nullopt, bool IsText=true)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
OptionalFileEntryRef getOptionalFileRef(StringRef Filename, bool OpenFile=false, bool CacheFailure=true)
Get a FileEntryRef if it exists, without doing anything on error.
A global index for a set of module files, providing information about the identifiers within those mo...
bool loadedModuleFile(ModuleFile *File)
Note that the given module file has been loaded.
void printStats()
Print statistics to standard error.
bool lookupIdentifier(llvm::StringRef Name, HitSet &Hits)
Look for all of the module files with information about the given identifier, e.g....
void getModuleDependencies(ModuleFile *File, llvm::SmallVectorImpl< ModuleFile * > &Dependencies)
Retrieve the set of module files on which the given module file directly depends.
IdentifierIterator * createIdentifierIterator() const
Returns an iterator for identifiers stored in the index table.
static std::pair< GlobalModuleIndex *, llvm::Error > readIndex(llvm::StringRef Path)
Read a global index file for the given directory.
void dump()
Print debugging view to standard error.
static llvm::Error writeIndex(FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr, llvm::StringRef Path)
Write a global index into the given.
An iterator that walks over all of the known identifiers in the lookup table.
Implements an efficient mapping from strings to IdentifierInfo nodes.
This abstract interface provides operations for unwrapping containers for serialized ASTs (precompile...
virtual llvm::StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const =0
Returns the serialized AST inside the PCH container Buffer.
Information about a module that has been loaded by the ASTReader.
Base class for the trait describing the on-disk hash table for the identifiers in an AST file.
@ ModuleFileInfo
Dump information about a module file.
bool LE(InterpState &S, CodePtr OpPC)
@ AST_BLOCK_ID
The AST block, which acts as a container around the full AST block.
@ CONTROL_BLOCK_ID
The control block, which contains all of the information that needs to be validated prior to committi...
@ UNHASHED_CONTROL_BLOCK_ID
A block with unhashed content.
@ SIGNATURE
Record code for the signature that identifiers this AST file.
uint64_t IdentifierID
An ID number that refers to an identifier in an AST file.
@ IDENTIFIER_TABLE
Record code for the identifier table.
unsigned ComputeHash(Selector Sel)
The JSON file list parser is used to communicate input to InstallAPI.
@ Other
Other implicit parameter.
Diagnostic wrappers for TextAPI types for error reporting.
The signature of a module, which is a hash of the AST content.
static constexpr size_t size
static ASTFileSignature create(std::array< uint8_t, 20 > Bytes)
static ASTFileSignature createDummy()