clang 20.0.0git
GlobalModuleIndex.cpp
Go to the documentation of this file.
1//===--- GlobalModuleIndex.cpp - Global Module Index ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the GlobalModuleIndex class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "ASTReaderInternals.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Bitstream/BitstreamReader.h"
24#include "llvm/Bitstream/BitstreamWriter.h"
25#include "llvm/Support/DJB.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/LockFileManager.h"
28#include "llvm/Support/MemoryBuffer.h"
29#include "llvm/Support/OnDiskHashTable.h"
30#include "llvm/Support/Path.h"
31#include "llvm/Support/TimeProfiler.h"
32#include "llvm/Support/raw_ostream.h"
33#include <cstdio>
34using namespace clang;
35using namespace serialization;
36
37//----------------------------------------------------------------------------//
38// Shared constants
39//----------------------------------------------------------------------------//
40namespace {
41 enum {
42 /// The block containing the index.
43 GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
44 };
45
46 /// Describes the record types in the index.
47 enum IndexRecordTypes {
48 /// Contains version information and potentially other metadata,
49 /// used to determine if we can read this global index file.
50 INDEX_METADATA,
51 /// Describes a module, including its file name and dependencies.
52 MODULE,
53 /// The index for identifiers.
54 IDENTIFIER_INDEX
55 };
56}
57
58/// The name of the global index file.
59static const char * const IndexFileName = "modules.idx";
60
61/// The global index file version.
62static const unsigned CurrentVersion = 1;
63
64//----------------------------------------------------------------------------//
65// Global module index reader.
66//----------------------------------------------------------------------------//
67
68namespace {
69
70/// Trait used to read the identifier index from the on-disk hash
71/// table.
72class IdentifierIndexReaderTrait {
73public:
74 typedef StringRef external_key_type;
75 typedef StringRef internal_key_type;
76 typedef SmallVector<unsigned, 2> data_type;
77 typedef unsigned hash_value_type;
78 typedef unsigned offset_type;
79
80 static bool EqualKey(const internal_key_type& a, const internal_key_type& b) {
81 return a == b;
82 }
83
84 static hash_value_type ComputeHash(const internal_key_type& a) {
85 return llvm::djbHash(a);
86 }
87
88 static std::pair<unsigned, unsigned>
89 ReadKeyDataLength(const unsigned char*& d) {
90 using namespace llvm::support;
91 unsigned KeyLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
92 unsigned DataLen = endian::readNext<uint16_t, llvm::endianness::little>(d);
93 return std::make_pair(KeyLen, DataLen);
94 }
95
96 static const internal_key_type&
97 GetInternalKey(const external_key_type& x) { return x; }
98
99 static const external_key_type&
100 GetExternalKey(const internal_key_type& x) { return x; }
101
102 static internal_key_type ReadKey(const unsigned char* d, unsigned n) {
103 return StringRef((const char *)d, n);
104 }
105
106 static data_type ReadData(const internal_key_type& k,
107 const unsigned char* d,
108 unsigned DataLen) {
109 using namespace llvm::support;
110
111 data_type Result;
112 while (DataLen > 0) {
113 unsigned ID = endian::readNext<uint32_t, llvm::endianness::little>(d);
114 Result.push_back(ID);
115 DataLen -= 4;
116 }
117
118 return Result;
119 }
120};
121
122typedef llvm::OnDiskIterableChainedHashTable<IdentifierIndexReaderTrait>
123 IdentifierIndexTable;
124
125}
126
127GlobalModuleIndex::GlobalModuleIndex(
128 std::unique_ptr<llvm::MemoryBuffer> IndexBuffer,
129 llvm::BitstreamCursor Cursor)
130 : Buffer(std::move(IndexBuffer)), IdentifierIndex(), NumIdentifierLookups(),
131 NumIdentifierLookupHits() {
132 auto Fail = [&](llvm::Error &&Err) {
133 report_fatal_error("Module index '" + Buffer->getBufferIdentifier() +
134 "' failed: " + toString(std::move(Err)));
135 };
136
137 llvm::TimeTraceScope TimeScope("Module LoadIndex");
138 // Read the global index.
139 bool InGlobalIndexBlock = false;
140 bool Done = false;
141 while (!Done) {
142 llvm::BitstreamEntry Entry;
143 if (Expected<llvm::BitstreamEntry> Res = Cursor.advance())
144 Entry = Res.get();
145 else
146 Fail(Res.takeError());
147
148 switch (Entry.Kind) {
149 case llvm::BitstreamEntry::Error:
150 return;
151
152 case llvm::BitstreamEntry::EndBlock:
153 if (InGlobalIndexBlock) {
154 InGlobalIndexBlock = false;
155 Done = true;
156 continue;
157 }
158 return;
159
160
161 case llvm::BitstreamEntry::Record:
162 // Entries in the global index block are handled below.
163 if (InGlobalIndexBlock)
164 break;
165
166 return;
167
168 case llvm::BitstreamEntry::SubBlock:
169 if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) {
170 if (llvm::Error Err = Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID))
171 Fail(std::move(Err));
172 InGlobalIndexBlock = true;
173 } else if (llvm::Error Err = Cursor.SkipBlock())
174 Fail(std::move(Err));
175 continue;
176 }
177
179 StringRef Blob;
180 Expected<unsigned> MaybeIndexRecord =
181 Cursor.readRecord(Entry.ID, Record, &Blob);
182 if (!MaybeIndexRecord)
183 Fail(MaybeIndexRecord.takeError());
184 IndexRecordTypes IndexRecord =
185 static_cast<IndexRecordTypes>(MaybeIndexRecord.get());
186 switch (IndexRecord) {
187 case INDEX_METADATA:
188 // Make sure that the version matches.
189 if (Record.size() < 1 || Record[0] != CurrentVersion)
190 return;
191 break;
192
193 case MODULE: {
194 unsigned Idx = 0;
195 unsigned ID = Record[Idx++];
196
197 // Make room for this module's information.
198 if (ID == Modules.size())
199 Modules.push_back(ModuleInfo());
200 else
201 Modules.resize(ID + 1);
202
203 // Size/modification time for this module file at the time the
204 // global index was built.
205 Modules[ID].Size = Record[Idx++];
206 Modules[ID].ModTime = Record[Idx++];
207
208 // File name.
209 unsigned NameLen = Record[Idx++];
210 Modules[ID].FileName.assign(Record.begin() + Idx,
211 Record.begin() + Idx + NameLen);
212 Idx += NameLen;
213
214 // Dependencies
215 unsigned NumDeps = Record[Idx++];
216 Modules[ID].Dependencies.insert(Modules[ID].Dependencies.end(),
217 Record.begin() + Idx,
218 Record.begin() + Idx + NumDeps);
219 Idx += NumDeps;
220
221 // Make sure we're at the end of the record.
222 assert(Idx == Record.size() && "More module info?");
223
224 // Record this module as an unresolved module.
225 // FIXME: this doesn't work correctly for module names containing path
226 // separators.
227 StringRef ModuleName = llvm::sys::path::stem(Modules[ID].FileName);
228 // Remove the -<hash of ModuleMapPath>
229 ModuleName = ModuleName.rsplit('-').first;
230 UnresolvedModules[ModuleName] = ID;
231 break;
232 }
233
234 case IDENTIFIER_INDEX:
235 // Wire up the identifier index.
236 if (Record[0]) {
237 IdentifierIndex = IdentifierIndexTable::Create(
238 (const unsigned char *)Blob.data() + Record[0],
239 (const unsigned char *)Blob.data() + sizeof(uint32_t),
240 (const unsigned char *)Blob.data(), IdentifierIndexReaderTrait());
241 }
242 break;
243 }
244 }
245}
246
248 delete static_cast<IdentifierIndexTable *>(IdentifierIndex);
249}
250
251std::pair<GlobalModuleIndex *, llvm::Error>
253 // Load the index file, if it's there.
254 llvm::SmallString<128> IndexPath;
255 IndexPath += Path;
256 llvm::sys::path::append(IndexPath, IndexFileName);
257
258 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrErr =
259 llvm::MemoryBuffer::getFile(IndexPath.c_str());
260 if (!BufferOrErr)
261 return std::make_pair(nullptr,
262 llvm::errorCodeToError(BufferOrErr.getError()));
263 std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get());
264
265 /// The main bitstream cursor for the main block.
266 llvm::BitstreamCursor Cursor(*Buffer);
267
268 // Sniff for the signature.
269 for (unsigned char C : {'B', 'C', 'G', 'I'}) {
270 if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = Cursor.Read(8)) {
271 if (Res.get() != C)
272 return std::make_pair(
273 nullptr, llvm::createStringError(std::errc::illegal_byte_sequence,
274 "expected signature BCGI"));
275 } else
276 return std::make_pair(nullptr, Res.takeError());
277 }
278
279 return std::make_pair(new GlobalModuleIndex(std::move(Buffer), std::move(Cursor)),
280 llvm::Error::success());
281}
282
285 SmallVectorImpl<ModuleFile *> &Dependencies) {
286 // Look for information about this module file.
287 llvm::DenseMap<ModuleFile *, unsigned>::iterator Known
288 = ModulesByFile.find(File);
289 if (Known == ModulesByFile.end())
290 return;
291
292 // Record dependencies.
293 Dependencies.clear();
294 ArrayRef<unsigned> StoredDependencies = Modules[Known->second].Dependencies;
295 for (unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) {
296 if (ModuleFile *MF = Modules[I].File)
297 Dependencies.push_back(MF);
298 }
299}
300
301bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) {
302 Hits.clear();
303
304 // If there's no identifier index, there is nothing we can do.
305 if (!IdentifierIndex)
306 return false;
307
308 // Look into the identifier index.
309 ++NumIdentifierLookups;
310 IdentifierIndexTable &Table
311 = *static_cast<IdentifierIndexTable *>(IdentifierIndex);
312 IdentifierIndexTable::iterator Known = Table.find(Name);
313 if (Known == Table.end()) {
314 return false;
315 }
316
317 SmallVector<unsigned, 2> ModuleIDs = *Known;
318 for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
319 if (ModuleFile *MF = Modules[ModuleIDs[I]].File)
320 Hits.insert(MF);
321 }
322
323 ++NumIdentifierLookupHits;
324 return true;
325}
326
328 // Look for the module in the global module index based on the module name.
329 StringRef Name = File->ModuleName;
330 llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name);
331 if (Known == UnresolvedModules.end()) {
332 return true;
333 }
334
335 // Rectify this module with the global module index.
336 ModuleInfo &Info = Modules[Known->second];
337
338 // If the size and modification time match what we expected, record this
339 // module file.
340 bool Failed = true;
341 if (File->File.getSize() == Info.Size &&
342 File->File.getModificationTime() == Info.ModTime) {
343 Info.File = File;
344 ModulesByFile[File] = Known->second;
345
346 Failed = false;
347 }
348
349 // One way or another, we have resolved this module file.
350 UnresolvedModules.erase(Known);
351 return Failed;
352}
353
355 std::fprintf(stderr, "*** Global Module Index Statistics:\n");
356 if (NumIdentifierLookups) {
357 fprintf(stderr, " %u / %u identifier lookups succeeded (%f%%)\n",
358 NumIdentifierLookupHits, NumIdentifierLookups,
359 (double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
360 }
361 std::fprintf(stderr, "\n");
362}
363
364LLVM_DUMP_METHOD void GlobalModuleIndex::dump() {
365 llvm::errs() << "*** Global Module Index Dump:\n";
366 llvm::errs() << "Module files:\n";
367 for (auto &MI : Modules) {
368 llvm::errs() << "** " << MI.FileName << "\n";
369 if (MI.File)
370 MI.File->dump();
371 else
372 llvm::errs() << "\n";
373 }
374 llvm::errs() << "\n";
375}
376
377//----------------------------------------------------------------------------//
378// Global module index writer.
379//----------------------------------------------------------------------------//
380
381namespace {
382 /// Provides information about a specific module file.
383 struct ModuleFileInfo {
384 /// The numberic ID for this module file.
385 unsigned ID;
386
387 /// The set of modules on which this module depends. Each entry is
388 /// a module ID.
389 SmallVector<unsigned, 4> Dependencies;
390 ASTFileSignature Signature;
391 };
392
393 struct ImportedModuleFileInfo {
394 off_t StoredSize;
395 time_t StoredModTime;
396 ASTFileSignature StoredSignature;
397 ImportedModuleFileInfo(off_t Size, time_t ModTime, ASTFileSignature Sig)
398 : StoredSize(Size), StoredModTime(ModTime), StoredSignature(Sig) {}
399 };
400
401 /// Builder that generates the global module index file.
402 class GlobalModuleIndexBuilder {
403 FileManager &FileMgr;
404 const PCHContainerReader &PCHContainerRdr;
405
406 /// Mapping from files to module file information.
407 using ModuleFilesMap = llvm::MapVector<FileEntryRef, ModuleFileInfo>;
408
409 /// Information about each of the known module files.
410 ModuleFilesMap ModuleFiles;
411
412 /// Mapping from the imported module file to the imported
413 /// information.
414 using ImportedModuleFilesMap =
415 std::multimap<FileEntryRef, ImportedModuleFileInfo>;
416
417 /// Information about each importing of a module file.
418 ImportedModuleFilesMap ImportedModuleFiles;
419
420 /// Mapping from identifiers to the list of module file IDs that
421 /// consider this identifier to be interesting.
422 typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
423
424 /// A mapping from all interesting identifiers to the set of module
425 /// files in which those identifiers are considered interesting.
426 InterestingIdentifierMap InterestingIdentifiers;
427
428 /// Write the block-info block for the global module index file.
429 void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
430
431 /// Retrieve the module file information for the given file.
432 ModuleFileInfo &getModuleFileInfo(FileEntryRef File) {
433 auto [It, Inserted] = ModuleFiles.try_emplace(File);
434 if (Inserted) {
435 unsigned NewID = ModuleFiles.size();
436 ModuleFileInfo &Info = It->second;
437 Info.ID = NewID;
438 }
439 return It->second;
440 }
441
442 public:
443 explicit GlobalModuleIndexBuilder(
444 FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr)
445 : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr) {}
446
447 /// Load the contents of the given module file into the builder.
448 llvm::Error loadModuleFile(FileEntryRef File);
449
450 /// Write the index to the given bitstream.
451 /// \returns true if an error occurred, false otherwise.
452 bool writeIndex(llvm::BitstreamWriter &Stream);
453 };
454}
455
456static void emitBlockID(unsigned ID, const char *Name,
457 llvm::BitstreamWriter &Stream,
459 Record.clear();
460 Record.push_back(ID);
461 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record);
462
463 // Emit the block name if present.
464 if (!Name || Name[0] == 0) return;
465 Record.clear();
466 while (*Name)
467 Record.push_back(*Name++);
468 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record);
469}
470
471static void emitRecordID(unsigned ID, const char *Name,
472 llvm::BitstreamWriter &Stream,
474 Record.clear();
475 Record.push_back(ID);
476 while (*Name)
477 Record.push_back(*Name++);
478 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record);
479}
480
481void
482GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
484 Stream.EnterBlockInfoBlock();
485
486#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
487#define RECORD(X) emitRecordID(X, #X, Stream, Record)
488 BLOCK(GLOBAL_INDEX_BLOCK);
489 RECORD(INDEX_METADATA);
490 RECORD(MODULE);
491 RECORD(IDENTIFIER_INDEX);
492#undef RECORD
493#undef BLOCK
494
495 Stream.ExitBlock();
496}
497
498namespace {
499 class InterestingASTIdentifierLookupTrait
501
502 public:
503 /// The identifier and whether it is "interesting".
504 typedef std::pair<StringRef, bool> data_type;
505
506 data_type ReadData(const internal_key_type& k,
507 const unsigned char* d,
508 unsigned DataLen) {
509 // The first bit indicates whether this identifier is interesting.
510 // That's all we care about.
511 using namespace llvm::support;
512 IdentifierID RawID =
513 endian::readNext<IdentifierID, llvm::endianness::little>(d);
514 bool IsInteresting = RawID & 0x01;
515 return std::make_pair(k, IsInteresting);
516 }
517 };
518}
519
520llvm::Error GlobalModuleIndexBuilder::loadModuleFile(FileEntryRef File) {
521 // Open the module file.
522
523 auto Buffer = FileMgr.getBufferForFile(File, /*isVolatile=*/true);
524 if (!Buffer)
525 return llvm::createStringError(Buffer.getError(),
526 "failed getting buffer for module file");
527
528 // Initialize the input stream
529 llvm::BitstreamCursor InStream(PCHContainerRdr.ExtractPCH(**Buffer));
530
531 // Sniff for the signature.
532 for (unsigned char C : {'C', 'P', 'C', 'H'})
533 if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = InStream.Read(8)) {
534 if (Res.get() != C)
535 return llvm::createStringError(std::errc::illegal_byte_sequence,
536 "expected signature CPCH");
537 } else
538 return Res.takeError();
539
540 // Record this module file and assign it a unique ID (if it doesn't have
541 // one already).
542 unsigned ID = getModuleFileInfo(File).ID;
543
544 // Search for the blocks and records we care about.
545 enum { Other, ControlBlock, ASTBlock, DiagnosticOptionsBlock } State = Other;
546 bool Done = false;
547 while (!Done) {
548 Expected<llvm::BitstreamEntry> MaybeEntry = InStream.advance();
549 if (!MaybeEntry)
550 return MaybeEntry.takeError();
551 llvm::BitstreamEntry Entry = MaybeEntry.get();
552
553 switch (Entry.Kind) {
554 case llvm::BitstreamEntry::Error:
555 Done = true;
556 continue;
557
558 case llvm::BitstreamEntry::Record:
559 // In the 'other' state, just skip the record. We don't care.
560 if (State == Other) {
561 if (llvm::Expected<unsigned> Skipped = InStream.skipRecord(Entry.ID))
562 continue;
563 else
564 return Skipped.takeError();
565 }
566
567 // Handle potentially-interesting records below.
568 break;
569
570 case llvm::BitstreamEntry::SubBlock:
571 if (Entry.ID == CONTROL_BLOCK_ID) {
572 if (llvm::Error Err = InStream.EnterSubBlock(CONTROL_BLOCK_ID))
573 return Err;
574
575 // Found the control block.
576 State = ControlBlock;
577 continue;
578 }
579
580 if (Entry.ID == AST_BLOCK_ID) {
581 if (llvm::Error Err = InStream.EnterSubBlock(AST_BLOCK_ID))
582 return Err;
583
584 // Found the AST block.
585 State = ASTBlock;
586 continue;
587 }
588
589 if (Entry.ID == UNHASHED_CONTROL_BLOCK_ID) {
590 if (llvm::Error Err = InStream.EnterSubBlock(UNHASHED_CONTROL_BLOCK_ID))
591 return Err;
592
593 // Found the Diagnostic Options block.
594 State = DiagnosticOptionsBlock;
595 continue;
596 }
597
598 if (llvm::Error Err = InStream.SkipBlock())
599 return Err;
600
601 continue;
602
603 case llvm::BitstreamEntry::EndBlock:
604 State = Other;
605 continue;
606 }
607
608 // Read the given record.
610 StringRef Blob;
611 Expected<unsigned> MaybeCode = InStream.readRecord(Entry.ID, Record, &Blob);
612 if (!MaybeCode)
613 return MaybeCode.takeError();
614 unsigned Code = MaybeCode.get();
615
616 // Handle module dependencies.
617 if (State == ControlBlock && Code == IMPORT) {
618 unsigned Idx = 0;
619 // Read information about the AST file.
620
621 // Skip the imported kind
622 ++Idx;
623
624 // Skip the import location
625 ++Idx;
626
627 // Skip the module name (currently this is only used for prebuilt
628 // modules while here we are only dealing with cached).
629 Blob = Blob.substr(Record[Idx++]);
630
631 // Skip if it is standard C++ module
632 ++Idx;
633
634 // Load stored size/modification time.
635 off_t StoredSize = (off_t)Record[Idx++];
636 time_t StoredModTime = (time_t)Record[Idx++];
637
638 // Skip the stored signature.
639 // FIXME: we could read the signature out of the import and validate it.
640 StringRef SignatureBytes = Blob.substr(0, ASTFileSignature::size);
641 auto StoredSignature = ASTFileSignature::create(SignatureBytes.begin(),
642 SignatureBytes.end());
643 Blob = Blob.substr(ASTFileSignature::size);
644
645 // Retrieve the imported file name.
646 unsigned Length = Record[Idx++];
647 StringRef ImportedFile = Blob.substr(0, Length);
648 Blob = Blob.substr(Length);
649
650 // Find the imported module file.
651 auto DependsOnFile =
652 FileMgr.getOptionalFileRef(ImportedFile, /*OpenFile=*/false,
653 /*CacheFailure=*/false);
654
655 if (!DependsOnFile)
656 return llvm::createStringError(std::errc::bad_file_descriptor,
657 "imported file \"%s\" not found",
658 std::string(ImportedFile).c_str());
659
660 // Save the information in ImportedModuleFileInfo so we can verify after
661 // loading all pcms.
662 ImportedModuleFiles.insert(std::make_pair(
663 *DependsOnFile, ImportedModuleFileInfo(StoredSize, StoredModTime,
664 StoredSignature)));
665
666 // Record the dependency.
667 unsigned DependsOnID = getModuleFileInfo(*DependsOnFile).ID;
668 getModuleFileInfo(File).Dependencies.push_back(DependsOnID);
669
670 continue;
671 }
672
673 // Handle the identifier table
674 if (State == ASTBlock && Code == IDENTIFIER_TABLE && Record[0] > 0) {
675 typedef llvm::OnDiskIterableChainedHashTable<
676 InterestingASTIdentifierLookupTrait> InterestingIdentifierTable;
677 std::unique_ptr<InterestingIdentifierTable> Table(
678 InterestingIdentifierTable::Create(
679 (const unsigned char *)Blob.data() + Record[0],
680 (const unsigned char *)Blob.data() + sizeof(uint32_t),
681 (const unsigned char *)Blob.data()));
682 for (InterestingIdentifierTable::data_iterator D = Table->data_begin(),
683 DEnd = Table->data_end();
684 D != DEnd; ++D) {
685 std::pair<StringRef, bool> Ident = *D;
686 if (Ident.second)
687 InterestingIdentifiers[Ident.first].push_back(ID);
688 else
689 (void)InterestingIdentifiers[Ident.first];
690 }
691 }
692
693 // Get Signature.
694 if (State == DiagnosticOptionsBlock && Code == SIGNATURE) {
695 auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
696 assert(Signature != ASTFileSignature::createDummy() &&
697 "Dummy AST file signature not backpatched in ASTWriter.");
698 getModuleFileInfo(File).Signature = Signature;
699 }
700
701 // We don't care about this record.
702 }
703
704 return llvm::Error::success();
705}
706
707namespace {
708
709/// Trait used to generate the identifier index as an on-disk hash
710/// table.
711class IdentifierIndexWriterTrait {
712public:
713 typedef StringRef key_type;
714 typedef StringRef key_type_ref;
715 typedef SmallVector<unsigned, 2> data_type;
716 typedef const SmallVector<unsigned, 2> &data_type_ref;
717 typedef unsigned hash_value_type;
718 typedef unsigned offset_type;
719
720 static hash_value_type ComputeHash(key_type_ref Key) {
721 return llvm::djbHash(Key);
722 }
723
724 std::pair<unsigned,unsigned>
725 EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref Data) {
726 using namespace llvm::support;
727 endian::Writer LE(Out, llvm::endianness::little);
728 unsigned KeyLen = Key.size();
729 unsigned DataLen = Data.size() * 4;
730 LE.write<uint16_t>(KeyLen);
731 LE.write<uint16_t>(DataLen);
732 return std::make_pair(KeyLen, DataLen);
733 }
734
735 void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) {
736 Out.write(Key.data(), KeyLen);
737 }
738
739 void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data,
740 unsigned DataLen) {
741 using namespace llvm::support;
742 for (unsigned I = 0, N = Data.size(); I != N; ++I)
743 endian::write<uint32_t>(Out, Data[I], llvm::endianness::little);
744 }
745};
746
747}
748
749bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
750 for (auto MapEntry : ImportedModuleFiles) {
751 auto File = MapEntry.first;
752 ImportedModuleFileInfo &Info = MapEntry.second;
753 if (getModuleFileInfo(File).Signature) {
754 if (getModuleFileInfo(File).Signature != Info.StoredSignature)
755 // Verify Signature.
756 return true;
757 } else if (Info.StoredSize != File.getSize() ||
758 Info.StoredModTime != File.getModificationTime())
759 // Verify Size and ModTime.
760 return true;
761 }
762
763 using namespace llvm;
764 llvm::TimeTraceScope TimeScope("Module WriteIndex");
765
766 // Emit the file header.
767 Stream.Emit((unsigned)'B', 8);
768 Stream.Emit((unsigned)'C', 8);
769 Stream.Emit((unsigned)'G', 8);
770 Stream.Emit((unsigned)'I', 8);
771
772 // Write the block-info block, which describes the records in this bitcode
773 // file.
774 emitBlockInfoBlock(Stream);
775
776 Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
777
778 // Write the metadata.
780 Record.push_back(CurrentVersion);
781 Stream.EmitRecord(INDEX_METADATA, Record);
782
783 // Write the set of known module files.
784 for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
785 MEnd = ModuleFiles.end();
786 M != MEnd; ++M) {
787 Record.clear();
788 Record.push_back(M->second.ID);
789 Record.push_back(M->first.getSize());
790 Record.push_back(M->first.getModificationTime());
791
792 // File name
793 StringRef Name(M->first.getName());
794 Record.push_back(Name.size());
795 Record.append(Name.begin(), Name.end());
796
797 // Dependencies
798 Record.push_back(M->second.Dependencies.size());
799 Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
800 Stream.EmitRecord(MODULE, Record);
801 }
802
803 // Write the identifier -> module file mapping.
804 {
805 llvm::OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait> Generator;
806 IdentifierIndexWriterTrait Trait;
807
808 // Populate the hash table.
809 for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
810 IEnd = InterestingIdentifiers.end();
811 I != IEnd; ++I) {
812 Generator.insert(I->first(), I->second, Trait);
813 }
814
815 // Create the on-disk hash table in a buffer.
817 uint32_t BucketOffset;
818 {
819 using namespace llvm::support;
820 llvm::raw_svector_ostream Out(IdentifierTable);
821 // Make sure that no bucket is at offset 0
822 endian::write<uint32_t>(Out, 0, llvm::endianness::little);
823 BucketOffset = Generator.Emit(Out, Trait);
824 }
825
826 // Create a blob abbreviation
827 auto Abbrev = std::make_shared<BitCodeAbbrev>();
828 Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
829 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
830 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
831 unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
832
833 // Write the identifier table
834 uint64_t Record[] = {IDENTIFIER_INDEX, BucketOffset};
835 Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable);
836 }
837
838 Stream.ExitBlock();
839 return false;
840}
841
842llvm::Error
844 const PCHContainerReader &PCHContainerRdr,
845 StringRef Path) {
846 llvm::SmallString<128> IndexPath;
847 IndexPath += Path;
848 llvm::sys::path::append(IndexPath, IndexFileName);
849
850 // Coordinate building the global index file with other processes that might
851 // try to do the same.
852 llvm::LockFileManager Locked(IndexPath);
853 switch (Locked) {
854 case llvm::LockFileManager::LFS_Error:
855 return llvm::createStringError(std::errc::io_error, "LFS error");
856
857 case llvm::LockFileManager::LFS_Owned:
858 // We're responsible for building the index ourselves. Do so below.
859 break;
860
861 case llvm::LockFileManager::LFS_Shared:
862 // Someone else is responsible for building the index. We don't care
863 // when they finish, so we're done.
864 return llvm::createStringError(std::errc::device_or_resource_busy,
865 "someone else is building the index");
866 }
867
868 // The module index builder.
869 GlobalModuleIndexBuilder Builder(FileMgr, PCHContainerRdr);
870
871 // Load each of the module files.
872 std::error_code EC;
873 for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd;
874 D != DEnd && !EC;
875 D.increment(EC)) {
876 // If this isn't a module file, we don't care.
877 if (llvm::sys::path::extension(D->path()) != ".pcm") {
878 // ... unless it's a .pcm.lock file, which indicates that someone is
879 // in the process of rebuilding a module. They'll rebuild the index
880 // at the end of that translation unit, so we don't have to.
881 if (llvm::sys::path::extension(D->path()) == ".pcm.lock")
882 return llvm::createStringError(std::errc::device_or_resource_busy,
883 "someone else is building the index");
884
885 continue;
886 }
887
888 // If we can't find the module file, skip it.
889 auto ModuleFile = FileMgr.getOptionalFileRef(D->path());
890 if (!ModuleFile)
891 continue;
892
893 // Load this module file.
894 if (llvm::Error Err = Builder.loadModuleFile(*ModuleFile))
895 return Err;
896 }
897
898 // The output buffer, into which the global index will be written.
899 SmallString<16> OutputBuffer;
900 {
901 llvm::BitstreamWriter OutputStream(OutputBuffer);
902 if (Builder.writeIndex(OutputStream))
903 return llvm::createStringError(std::errc::io_error,
904 "failed writing index");
905 }
906
907 return llvm::writeToOutput(IndexPath, [&OutputBuffer](llvm::raw_ostream &OS) {
908 OS << OutputBuffer;
909 return llvm::Error::success();
910 });
911}
912
913namespace {
914 class GlobalIndexIdentifierIterator : public IdentifierIterator {
915 /// The current position within the identifier lookup table.
916 IdentifierIndexTable::key_iterator Current;
917
918 /// The end position within the identifier lookup table.
919 IdentifierIndexTable::key_iterator End;
920
921 public:
922 explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) {
923 Current = Idx.key_begin();
924 End = Idx.key_end();
925 }
926
927 StringRef Next() override {
928 if (Current == End)
929 return StringRef();
930
931 StringRef Result = *Current;
932 ++Current;
933 return Result;
934 }
935 };
936}
937
939 IdentifierIndexTable &Table =
940 *static_cast<IdentifierIndexTable *>(IdentifierIndex);
941 return new GlobalIndexIdentifierIterator(Table);
942}
#define RECORD(X)
static char ID
Definition: Arena.cpp:183
const Decl * D
IndirectLocalPath & Path
Defines the clang::FileManager interface and associated types.
static void emitRecordID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
static const unsigned CurrentVersion
The global index file version.
static const char *const IndexFileName
The name of the global index file.
static void emitBlockID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, SmallVectorImpl< uint64_t > &Record)
llvm::MachO::Record Record
Definition: MachO.h:31
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
unsigned NameLen
const char * Data
#define IMPORT(DERIVED, BASE)
Definition: Template.h:618
#define BLOCK(DERIVED, BASE)
Definition: Template.h:631
__device__ __2f16 b
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition: FileEntry.h:57
Implements support for file system lookup, file system caching, and directory search management.
Definition: FileManager.h:53
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(FileEntryRef Entry, bool isVolatile=false, bool RequiresNullTerminator=true, std::optional< int64_t > MaybeLimit=std::nullopt, bool IsText=true)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
OptionalFileEntryRef getOptionalFileRef(StringRef Filename, bool OpenFile=false, bool CacheFailure=true)
Get a FileEntryRef if it exists, without doing anything on error.
Definition: FileManager.h:245
A global index for a set of module files, providing information about the identifiers within those mo...
bool loadedModuleFile(ModuleFile *File)
Note that the given module file has been loaded.
void printStats()
Print statistics to standard error.
bool lookupIdentifier(llvm::StringRef Name, HitSet &Hits)
Look for all of the module files with information about the given identifier, e.g....
void getModuleDependencies(ModuleFile *File, llvm::SmallVectorImpl< ModuleFile * > &Dependencies)
Retrieve the set of module files on which the given module file directly depends.
IdentifierIterator * createIdentifierIterator() const
Returns an iterator for identifiers stored in the index table.
static std::pair< GlobalModuleIndex *, llvm::Error > readIndex(llvm::StringRef Path)
Read a global index file for the given directory.
void dump()
Print debugging view to standard error.
static llvm::Error writeIndex(FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr, llvm::StringRef Path)
Write a global index into the given.
An iterator that walks over all of the known identifiers in the lookup table.
Implements an efficient mapping from strings to IdentifierInfo nodes.
This abstract interface provides operations for unwrapping containers for serialized ASTs (precompile...
virtual llvm::StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const =0
Returns the serialized AST inside the PCH container Buffer.
Information about a module that has been loaded by the ASTReader.
Definition: ModuleFile.h:130
Base class for the trait describing the on-disk hash table for the identifiers in an AST file.
@ ModuleFileInfo
Dump information about a module file.
bool LE(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1171
@ AST_BLOCK_ID
The AST block, which acts as a container around the full AST block.
Definition: ASTBitCodes.h:296
@ CONTROL_BLOCK_ID
The control block, which contains all of the information that needs to be validated prior to committi...
Definition: ASTBitCodes.h:322
@ UNHASHED_CONTROL_BLOCK_ID
A block with unhashed content.
Definition: ASTBitCodes.h:344
@ SIGNATURE
Record code for the signature that identifiers this AST file.
Definition: ASTBitCodes.h:407
uint64_t IdentifierID
An ID number that refers to an identifier in an AST file.
Definition: ASTBitCodes.h:63
@ IDENTIFIER_TABLE
Record code for the identifier table.
Definition: ASTBitCodes.h:500
unsigned ComputeHash(Selector Sel)
Definition: ASTCommon.cpp:297
std::shared_ptr< MatchComputation< T > > Generator
Definition: RewriteRule.h:65
The JSON file list parser is used to communicate input to InstallAPI.
@ Other
Other implicit parameter.
unsigned long uint64_t
unsigned int uint32_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
The signature of a module, which is a hash of the AST content.
Definition: Module.h:58
static constexpr size_t size
Definition: Module.h:61
static ASTFileSignature create(std::array< uint8_t, 20 > Bytes)
Definition: Module.h:76
static ASTFileSignature createDummy()
Definition: Module.h:86