clang 20.0.0git
SourceManager.cpp
Go to the documentation of this file.
1//===- SourceManager.cpp - Track and cache source files -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SourceManager interface.
10//
11//===----------------------------------------------------------------------===//
12
16#include "clang/Basic/LLVM.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
25#include "llvm/ADT/StringSwitch.h"
26#include "llvm/Support/Allocator.h"
27#include "llvm/Support/AutoConvert.h"
28#include "llvm/Support/Capacity.h"
29#include "llvm/Support/Compiler.h"
30#include "llvm/Support/Endian.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/Support/FileSystem.h"
33#include "llvm/Support/MathExtras.h"
34#include "llvm/Support/MemoryBuffer.h"
35#include "llvm/Support/Path.h"
36#include "llvm/Support/raw_ostream.h"
37#include <algorithm>
38#include <cassert>
39#include <cstddef>
40#include <cstdint>
41#include <memory>
42#include <optional>
43#include <tuple>
44#include <utility>
45#include <vector>
46
47using namespace clang;
48using namespace SrcMgr;
49using llvm::MemoryBuffer;
50
51#define DEBUG_TYPE "source-manager"
52
53// Reaching a limit of 2^31 results in a hard error. This metric allows to track
54// if particular invocation of the compiler is close to it.
55STATISTIC(MaxUsedSLocBytes, "Maximum number of bytes used by source locations "
56 "(both loaded and local).");
57
58//===----------------------------------------------------------------------===//
59// SourceManager Helper Classes
60//===----------------------------------------------------------------------===//
61
62/// getSizeBytesMapped - Returns the number of bytes actually mapped for this
63/// ContentCache. This can be 0 if the MemBuffer was not actually expanded.
65 return Buffer ? Buffer->getBufferSize() : 0;
66}
67
68/// Returns the kind of memory used to back the memory buffer for
69/// this content cache. This is used for performance analysis.
70llvm::MemoryBuffer::BufferKind ContentCache::getMemoryBufferKind() const {
71 if (Buffer == nullptr) {
72 assert(0 && "Buffer should never be null");
73 return llvm::MemoryBuffer::MemoryBuffer_Malloc;
74 }
75 return Buffer->getBufferKind();
76}
77
78/// getSize - Returns the size of the content encapsulated by this ContentCache.
79/// This can be the size of the source file or the size of an arbitrary
80/// scratch buffer. If the ContentCache encapsulates a source file, that
81/// file is not lazily brought in from disk to satisfy this query.
82unsigned ContentCache::getSize() const {
83 return Buffer ? (unsigned)Buffer->getBufferSize()
85}
86
87const char *ContentCache::getInvalidBOM(StringRef BufStr) {
88 // If the buffer is valid, check to see if it has a UTF Byte Order Mark
89 // (BOM). We only support UTF-8 with and without a BOM right now. See
90 // http://en.wikipedia.org/wiki/Byte_order_mark for more information.
91 const char *InvalidBOM =
92 llvm::StringSwitch<const char *>(BufStr)
93 .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
94 "UTF-32 (BE)")
95 .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
96 "UTF-32 (LE)")
97 .StartsWith("\xFE\xFF", "UTF-16 (BE)")
98 .StartsWith("\xFF\xFE", "UTF-16 (LE)")
99 .StartsWith("\x2B\x2F\x76", "UTF-7")
100 .StartsWith("\xF7\x64\x4C", "UTF-1")
101 .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
102 .StartsWith("\x0E\xFE\xFF", "SCSU")
103 .StartsWith("\xFB\xEE\x28", "BOCU-1")
104 .StartsWith("\x84\x31\x95\x33", "GB-18030")
105 .Default(nullptr);
106
107 return InvalidBOM;
108}
109
110std::optional<llvm::MemoryBufferRef>
112 SourceLocation Loc) const {
113 // Lazily create the Buffer for ContentCaches that wrap files. If we already
114 // computed it, just return what we have.
115 if (IsBufferInvalid)
116 return std::nullopt;
117 if (Buffer)
118 return Buffer->getMemBufferRef();
119 if (!ContentsEntry)
120 return std::nullopt;
121
122 // Start with the assumption that the buffer is invalid to simplify early
123 // return paths.
124 IsBufferInvalid = true;
125
126 auto BufferOrError = FM.getBufferForFile(*ContentsEntry, IsFileVolatile);
127
128 // If we were unable to open the file, then we are in an inconsistent
129 // situation where the content cache referenced a file which no longer
130 // exists. Most likely, we were using a stat cache with an invalid entry but
131 // the file could also have been removed during processing. Since we can't
132 // really deal with this situation, just create an empty buffer.
133 if (!BufferOrError) {
134 Diag.Report(Loc, diag::err_cannot_open_file)
135 << ContentsEntry->getName() << BufferOrError.getError().message();
136
137 return std::nullopt;
138 }
139
140 Buffer = std::move(*BufferOrError);
141
142 // Check that the file's size fits in an 'unsigned' (with room for a
143 // past-the-end value). This is deeply regrettable, but various parts of
144 // Clang (including elsewhere in this file!) use 'unsigned' to represent file
145 // offsets, line numbers, string literal lengths, and so on, and fail
146 // miserably on large source files.
147 //
148 // Note: ContentsEntry could be a named pipe, in which case
149 // ContentsEntry::getSize() could have the wrong size. Use
150 // MemoryBuffer::getBufferSize() instead.
151 if (Buffer->getBufferSize() >= std::numeric_limits<unsigned>::max()) {
152 Diag.Report(Loc, diag::err_file_too_large) << ContentsEntry->getName();
153
154 return std::nullopt;
155 }
156
157 // Unless this is a named pipe (in which case we can handle a mismatch),
158 // check that the file's size is the same as in the file entry (which may
159 // have come from a stat cache).
160 // The buffer will always be larger than the file size on z/OS in the presence
161 // of characters outside the base character set.
162 assert(Buffer->getBufferSize() >= (size_t)ContentsEntry->getSize());
163 if (!ContentsEntry->isNamedPipe() &&
164 Buffer->getBufferSize() < (size_t)ContentsEntry->getSize()) {
165 Diag.Report(Loc, diag::err_file_modified) << ContentsEntry->getName();
166
167 return std::nullopt;
168 }
169
170 // If the buffer is valid, check to see if it has a UTF Byte Order Mark
171 // (BOM). We only support UTF-8 with and without a BOM right now. See
172 // http://en.wikipedia.org/wiki/Byte_order_mark for more information.
173 StringRef BufStr = Buffer->getBuffer();
174 const char *InvalidBOM = getInvalidBOM(BufStr);
175
176 if (InvalidBOM) {
177 Diag.Report(Loc, diag::err_unsupported_bom)
178 << InvalidBOM << ContentsEntry->getName();
179 return std::nullopt;
180 }
181
182 // Buffer has been validated.
183 IsBufferInvalid = false;
184 return Buffer->getMemBufferRef();
185}
186
188 auto IterBool = FilenameIDs.try_emplace(Name, FilenamesByID.size());
189 if (IterBool.second)
190 FilenamesByID.push_back(&*IterBool.first);
191 return IterBool.first->second;
192}
193
194/// Add a line note to the line table that indicates that there is a \#line or
195/// GNU line marker at the specified FID/Offset location which changes the
196/// presumed location to LineNo/FilenameID. If EntryExit is 0, then this doesn't
197/// change the presumed \#include stack. If it is 1, this is a file entry, if
198/// it is 2 then this is a file exit. FileKind specifies whether this is a
199/// system header or extern C system header.
200void LineTableInfo::AddLineNote(FileID FID, unsigned Offset, unsigned LineNo,
201 int FilenameID, unsigned EntryExit,
203 std::vector<LineEntry> &Entries = LineEntries[FID];
204
205 assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
206 "Adding line entries out of order!");
207
208 unsigned IncludeOffset = 0;
209 if (EntryExit == 1) {
210 // Push #include
211 IncludeOffset = Offset-1;
212 } else {
213 const auto *PrevEntry = Entries.empty() ? nullptr : &Entries.back();
214 if (EntryExit == 2) {
215 // Pop #include
216 assert(PrevEntry && PrevEntry->IncludeOffset &&
217 "PPDirectives should have caught case when popping empty include "
218 "stack");
219 PrevEntry = FindNearestLineEntry(FID, PrevEntry->IncludeOffset);
220 }
221 if (PrevEntry) {
222 IncludeOffset = PrevEntry->IncludeOffset;
223 if (FilenameID == -1) {
224 // An unspecified FilenameID means use the previous (or containing)
225 // filename if available, or the main source file otherwise.
226 FilenameID = PrevEntry->FilenameID;
227 }
228 }
229 }
230
231 Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
232 IncludeOffset));
233}
234
235/// FindNearestLineEntry - Find the line entry nearest to FID that is before
236/// it. If there is no line entry before Offset in FID, return null.
238 unsigned Offset) {
239 const std::vector<LineEntry> &Entries = LineEntries[FID];
240 assert(!Entries.empty() && "No #line entries for this FID after all!");
241
242 // It is very common for the query to be after the last #line, check this
243 // first.
244 if (Entries.back().FileOffset <= Offset)
245 return &Entries.back();
246
247 // Do a binary search to find the maximal element that is still before Offset.
248 std::vector<LineEntry>::const_iterator I = llvm::upper_bound(Entries, Offset);
249 if (I == Entries.begin())
250 return nullptr;
251 return &*--I;
252}
253
254/// Add a new line entry that has already been encoded into
255/// the internal representation of the line table.
257 const std::vector<LineEntry> &Entries) {
258 LineEntries[FID] = Entries;
259}
260
261/// getLineTableFilenameID - Return the uniqued ID for the specified filename.
263 return getLineTable().getLineTableFilenameID(Name);
264}
265
266/// AddLineNote - Add a line note to the line table for the FileID and offset
267/// specified by Loc. If FilenameID is -1, it is considered to be
268/// unspecified.
270 int FilenameID, bool IsFileEntry,
271 bool IsFileExit,
273 std::pair<FileID, unsigned> LocInfo = getDecomposedExpansionLoc(Loc);
274
275 bool Invalid = false;
276 SLocEntry &Entry = getSLocEntry(LocInfo.first, &Invalid);
277 if (!Entry.isFile() || Invalid)
278 return;
279
281
282 // Remember that this file has #line directives now if it doesn't already.
284
285 (void) getLineTable();
286
287 unsigned EntryExit = 0;
288 if (IsFileEntry)
289 EntryExit = 1;
290 else if (IsFileExit)
291 EntryExit = 2;
292
293 LineTable->AddLineNote(LocInfo.first, LocInfo.second, LineNo, FilenameID,
294 EntryExit, FileKind);
295}
296
298 if (!LineTable)
299 LineTable.reset(new LineTableInfo());
300 return *LineTable;
301}
302
303//===----------------------------------------------------------------------===//
304// Private 'Create' methods.
305//===----------------------------------------------------------------------===//
306
308 bool UserFilesAreVolatile)
309 : Diag(Diag), FileMgr(FileMgr), UserFilesAreVolatile(UserFilesAreVolatile) {
311 Diag.setSourceManager(this);
312}
313
315 // Delete FileEntry objects corresponding to content caches. Since the actual
316 // content cache objects are bump pointer allocated, we just have to run the
317 // dtors, but we call the deallocate method for completeness.
318 for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
319 if (MemBufferInfos[i]) {
320 MemBufferInfos[i]->~ContentCache();
321 ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
322 }
323 }
324 for (auto I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
325 if (I->second) {
326 I->second->~ContentCache();
327 ContentCacheAlloc.Deallocate(I->second);
328 }
329 }
330}
331
333 MainFileID = FileID();
334 LocalSLocEntryTable.clear();
335 LoadedSLocEntryTable.clear();
336 SLocEntryLoaded.clear();
337 SLocEntryOffsetLoaded.clear();
338 LastLineNoFileIDQuery = FileID();
339 LastLineNoContentCache = nullptr;
340 LastFileIDLookup = FileID();
341
342 IncludedLocMap.clear();
343 if (LineTable)
344 LineTable->clear();
345
346 // Use up FileID #0 as an invalid expansion.
347 NextLocalOffset = 0;
348 CurrentLoadedOffset = MaxLoadedOffset;
350}
351
352bool SourceManager::isMainFile(const FileEntry &SourceFile) {
353 assert(MainFileID.isValid() && "expected initialized SourceManager");
354 if (auto *FE = getFileEntryForID(MainFileID))
355 return FE->getUID() == SourceFile.getUID();
356 return false;
357}
358
360 assert(MainFileID.isInvalid() && "expected uninitialized SourceManager");
361
362 auto CloneContentCache = [&](const ContentCache *Cache) -> ContentCache * {
363 auto *Clone = new (ContentCacheAlloc.Allocate<ContentCache>()) ContentCache;
364 Clone->OrigEntry = Cache->OrigEntry;
365 Clone->ContentsEntry = Cache->ContentsEntry;
366 Clone->BufferOverridden = Cache->BufferOverridden;
367 Clone->IsFileVolatile = Cache->IsFileVolatile;
368 Clone->IsTransient = Cache->IsTransient;
369 Clone->setUnownedBuffer(Cache->getBufferIfLoaded());
370 return Clone;
371 };
372
373 // Ensure all SLocEntries are loaded from the external source.
374 for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I)
375 if (!Old.SLocEntryLoaded[I])
376 Old.loadSLocEntry(I, nullptr);
377
378 // Inherit any content cache data from the old source manager.
379 for (auto &FileInfo : Old.FileInfos) {
380 SrcMgr::ContentCache *&Slot = FileInfos[FileInfo.first];
381 if (Slot)
382 continue;
383 Slot = CloneContentCache(FileInfo.second);
384 }
385}
386
387ContentCache &SourceManager::getOrCreateContentCache(FileEntryRef FileEnt,
388 bool isSystemFile) {
389 // Do we already have information about this file?
390 ContentCache *&Entry = FileInfos[FileEnt];
391 if (Entry)
392 return *Entry;
393
394 // Nope, create a new Cache entry.
395 Entry = ContentCacheAlloc.Allocate<ContentCache>();
396
397 if (OverriddenFilesInfo) {
398 // If the file contents are overridden with contents from another file,
399 // pass that file to ContentCache.
400 auto overI = OverriddenFilesInfo->OverriddenFiles.find(FileEnt);
401 if (overI == OverriddenFilesInfo->OverriddenFiles.end())
402 new (Entry) ContentCache(FileEnt);
403 else
404 new (Entry) ContentCache(OverridenFilesKeepOriginalName ? FileEnt
405 : overI->second,
406 overI->second);
407 } else {
408 new (Entry) ContentCache(FileEnt);
409 }
410
411 Entry->IsFileVolatile = UserFilesAreVolatile && !isSystemFile;
412 Entry->IsTransient = FilesAreTransient;
413 Entry->BufferOverridden |= FileEnt.isNamedPipe();
414
415 return *Entry;
416}
417
418/// Create a new ContentCache for the specified memory buffer.
419/// This does no caching.
420ContentCache &SourceManager::createMemBufferContentCache(
421 std::unique_ptr<llvm::MemoryBuffer> Buffer) {
422 // Add a new ContentCache to the MemBufferInfos list and return it.
423 ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>();
424 new (Entry) ContentCache();
425 MemBufferInfos.push_back(Entry);
426 Entry->setBuffer(std::move(Buffer));
427 return *Entry;
428}
429
430const SrcMgr::SLocEntry &SourceManager::loadSLocEntry(unsigned Index,
431 bool *Invalid) const {
432 return const_cast<SourceManager *>(this)->loadSLocEntry(Index, Invalid);
433}
434
435SrcMgr::SLocEntry &SourceManager::loadSLocEntry(unsigned Index, bool *Invalid) {
436 assert(!SLocEntryLoaded[Index]);
437 if (ExternalSLocEntries->ReadSLocEntry(-(static_cast<int>(Index) + 2))) {
438 if (Invalid)
439 *Invalid = true;
440 // If the file of the SLocEntry changed we could still have loaded it.
441 if (!SLocEntryLoaded[Index]) {
442 // Try to recover; create a SLocEntry so the rest of clang can handle it.
443 if (!FakeSLocEntryForRecovery)
444 FakeSLocEntryForRecovery = std::make_unique<SLocEntry>(SLocEntry::get(
445 0, FileInfo::get(SourceLocation(), getFakeContentCacheForRecovery(),
446 SrcMgr::C_User, "")));
447 return *FakeSLocEntryForRecovery;
448 }
449 }
450
451 return LoadedSLocEntryTable[Index];
452}
453
454std::pair<int, SourceLocation::UIntTy>
456 SourceLocation::UIntTy TotalSize) {
457 assert(ExternalSLocEntries && "Don't have an external sloc source");
458 // Make sure we're not about to run out of source locations.
459 if (CurrentLoadedOffset < TotalSize ||
460 CurrentLoadedOffset - TotalSize < NextLocalOffset) {
461 return std::make_pair(0, 0);
462 }
463 LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries);
464 SLocEntryLoaded.resize(LoadedSLocEntryTable.size());
465 SLocEntryOffsetLoaded.resize(LoadedSLocEntryTable.size());
466 CurrentLoadedOffset -= TotalSize;
467 updateSlocUsageStats();
468 int BaseID = -int(LoadedSLocEntryTable.size()) - 1;
469 LoadedSLocEntryAllocBegin.push_back(FileID::get(BaseID));
470 return std::make_pair(BaseID, CurrentLoadedOffset);
471}
472
473/// As part of recovering from missing or changed content, produce a
474/// fake, non-empty buffer.
475llvm::MemoryBufferRef SourceManager::getFakeBufferForRecovery() const {
476 if (!FakeBufferForRecovery)
477 FakeBufferForRecovery =
478 llvm::MemoryBuffer::getMemBuffer("<<<INVALID BUFFER>>");
479
480 return *FakeBufferForRecovery;
481}
482
483/// As part of recovering from missing or changed content, produce a
484/// fake content cache.
485SrcMgr::ContentCache &SourceManager::getFakeContentCacheForRecovery() const {
486 if (!FakeContentCacheForRecovery) {
487 FakeContentCacheForRecovery = std::make_unique<SrcMgr::ContentCache>();
488 FakeContentCacheForRecovery->setUnownedBuffer(getFakeBufferForRecovery());
489 }
490 return *FakeContentCacheForRecovery;
491}
492
493/// Returns the previous in-order FileID or an invalid FileID if there
494/// is no previous one.
495FileID SourceManager::getPreviousFileID(FileID FID) const {
496 if (FID.isInvalid())
497 return FileID();
498
499 int ID = FID.ID;
500 if (ID == -1)
501 return FileID();
502
503 if (ID > 0) {
504 if (ID-1 == 0)
505 return FileID();
506 } else if (unsigned(-(ID-1) - 2) >= LoadedSLocEntryTable.size()) {
507 return FileID();
508 }
509
510 return FileID::get(ID-1);
511}
512
513/// Returns the next in-order FileID or an invalid FileID if there is
514/// no next one.
515FileID SourceManager::getNextFileID(FileID FID) const {
516 if (FID.isInvalid())
517 return FileID();
518
519 int ID = FID.ID;
520 if (ID > 0) {
521 if (unsigned(ID+1) >= local_sloc_entry_size())
522 return FileID();
523 } else if (ID+1 >= -1) {
524 return FileID();
525 }
526
527 return FileID::get(ID+1);
528}
529
530//===----------------------------------------------------------------------===//
531// Methods to create new FileID's and macro expansions.
532//===----------------------------------------------------------------------===//
533
534/// Create a new FileID that represents the specified file
535/// being \#included from the specified IncludePosition.
537 SourceLocation IncludePos,
538 SrcMgr::CharacteristicKind FileCharacter,
539 int LoadedID,
540 SourceLocation::UIntTy LoadedOffset) {
541 SrcMgr::ContentCache &IR = getOrCreateContentCache(SourceFile,
542 isSystem(FileCharacter));
543
544 // If this is a named pipe, immediately load the buffer to ensure subsequent
545 // calls to ContentCache::getSize() are accurate.
546 if (IR.ContentsEntry->isNamedPipe())
547 (void)IR.getBufferOrNone(Diag, getFileManager(), SourceLocation());
548
549 return createFileIDImpl(IR, SourceFile.getName(), IncludePos, FileCharacter,
550 LoadedID, LoadedOffset);
551}
552
553/// Create a new FileID that represents the specified memory buffer.
554///
555/// This does no caching of the buffer and takes ownership of the
556/// MemoryBuffer, so only pass a MemoryBuffer to this once.
557FileID SourceManager::createFileID(std::unique_ptr<llvm::MemoryBuffer> Buffer,
558 SrcMgr::CharacteristicKind FileCharacter,
559 int LoadedID,
560 SourceLocation::UIntTy LoadedOffset,
561 SourceLocation IncludeLoc) {
562 StringRef Name = Buffer->getBufferIdentifier();
563 return createFileIDImpl(createMemBufferContentCache(std::move(Buffer)), Name,
564 IncludeLoc, FileCharacter, LoadedID, LoadedOffset);
565}
566
567/// Create a new FileID that represents the specified memory buffer.
568///
569/// This does not take ownership of the MemoryBuffer. The memory buffer must
570/// outlive the SourceManager.
571FileID SourceManager::createFileID(const llvm::MemoryBufferRef &Buffer,
572 SrcMgr::CharacteristicKind FileCharacter,
573 int LoadedID,
574 SourceLocation::UIntTy LoadedOffset,
575 SourceLocation IncludeLoc) {
576 return createFileID(llvm::MemoryBuffer::getMemBuffer(Buffer), FileCharacter,
577 LoadedID, LoadedOffset, IncludeLoc);
578}
579
580/// Get the FileID for \p SourceFile if it exists. Otherwise, create a
581/// new FileID for the \p SourceFile.
582FileID
584 SrcMgr::CharacteristicKind FileCharacter) {
585 FileID ID = translateFile(SourceFile);
586 return ID.isValid() ? ID : createFileID(SourceFile, SourceLocation(),
587 FileCharacter);
588}
589
590/// Helper function to determine if an input file requires conversion
591bool needConversion(StringRef Filename) {
592#ifdef __MVS__
593 llvm::ErrorOr<bool> NeedConversion =
594 llvm::needzOSConversion(Filename.str().c_str());
595 return NeedConversion && *NeedConversion;
596#else
597 return false;
598#endif
599}
600
601/// createFileID - Create a new FileID for the specified ContentCache and
602/// include position. This works regardless of whether the ContentCache
603/// corresponds to a file or some other input source.
604FileID SourceManager::createFileIDImpl(ContentCache &File, StringRef Filename,
605 SourceLocation IncludePos,
606 SrcMgr::CharacteristicKind FileCharacter,
607 int LoadedID,
608 SourceLocation::UIntTy LoadedOffset) {
609 if (LoadedID < 0) {
610 assert(LoadedID != -1 && "Loading sentinel FileID");
611 unsigned Index = unsigned(-LoadedID) - 2;
612 assert(Index < LoadedSLocEntryTable.size() && "FileID out of range");
613 assert(!SLocEntryLoaded[Index] && "FileID already loaded");
614 LoadedSLocEntryTable[Index] = SLocEntry::get(
615 LoadedOffset, FileInfo::get(IncludePos, File, FileCharacter, Filename));
616 SLocEntryLoaded[Index] = SLocEntryOffsetLoaded[Index] = true;
617 return FileID::get(LoadedID);
618 }
619 unsigned FileSize = File.getSize();
620 bool NeedConversion = needConversion(Filename);
621 if (NeedConversion) {
622 // Buffer size may increase due to potential z/OS EBCDIC to UTF-8
623 // conversion.
624 if (std::optional<llvm::MemoryBufferRef> Buffer =
625 File.getBufferOrNone(Diag, getFileManager())) {
626 unsigned BufSize = Buffer->getBufferSize();
627 if (BufSize > FileSize) {
628 if (File.ContentsEntry.has_value())
629 File.ContentsEntry->updateFileEntryBufferSize(BufSize);
630 FileSize = BufSize;
631 }
632 }
633 }
634 if (!(NextLocalOffset + FileSize + 1 > NextLocalOffset &&
635 NextLocalOffset + FileSize + 1 <= CurrentLoadedOffset)) {
636 Diag.Report(IncludePos, diag::err_sloc_space_too_large);
638 return FileID();
639 }
640 LocalSLocEntryTable.push_back(
641 SLocEntry::get(NextLocalOffset,
642 FileInfo::get(IncludePos, File, FileCharacter, Filename)));
643 // We do a +1 here because we want a SourceLocation that means "the end of the
644 // file", e.g. for the "no newline at the end of the file" diagnostic.
645 NextLocalOffset += FileSize + 1;
646 updateSlocUsageStats();
647
648 // Set LastFileIDLookup to the newly created file. The next getFileID call is
649 // almost guaranteed to be from that file.
650 FileID FID = FileID::get(LocalSLocEntryTable.size()-1);
651 return LastFileIDLookup = FID;
652}
653
655 SourceLocation SpellingLoc, SourceLocation ExpansionLoc, unsigned Length) {
657 ExpansionLoc);
658 return createExpansionLocImpl(Info, Length);
659}
660
662 SourceLocation SpellingLoc, SourceLocation ExpansionLocStart,
663 SourceLocation ExpansionLocEnd, unsigned Length,
664 bool ExpansionIsTokenRange, int LoadedID,
665 SourceLocation::UIntTy LoadedOffset) {
667 SpellingLoc, ExpansionLocStart, ExpansionLocEnd, ExpansionIsTokenRange);
668 return createExpansionLocImpl(Info, Length, LoadedID, LoadedOffset);
669}
670
672 SourceLocation TokenStart,
673 SourceLocation TokenEnd) {
674 assert(getFileID(TokenStart) == getFileID(TokenEnd) &&
675 "token spans multiple files");
676 return createExpansionLocImpl(
677 ExpansionInfo::createForTokenSplit(Spelling, TokenStart, TokenEnd),
678 TokenEnd.getOffset() - TokenStart.getOffset());
679}
680
682SourceManager::createExpansionLocImpl(const ExpansionInfo &Info,
683 unsigned Length, int LoadedID,
684 SourceLocation::UIntTy LoadedOffset) {
685 if (LoadedID < 0) {
686 assert(LoadedID != -1 && "Loading sentinel FileID");
687 unsigned Index = unsigned(-LoadedID) - 2;
688 assert(Index < LoadedSLocEntryTable.size() && "FileID out of range");
689 assert(!SLocEntryLoaded[Index] && "FileID already loaded");
690 LoadedSLocEntryTable[Index] = SLocEntry::get(LoadedOffset, Info);
691 SLocEntryLoaded[Index] = SLocEntryOffsetLoaded[Index] = true;
692 return SourceLocation::getMacroLoc(LoadedOffset);
693 }
694 LocalSLocEntryTable.push_back(SLocEntry::get(NextLocalOffset, Info));
695 if (NextLocalOffset + Length + 1 <= NextLocalOffset ||
696 NextLocalOffset + Length + 1 > CurrentLoadedOffset) {
697 Diag.Report(diag::err_sloc_space_too_large);
698 // FIXME: call `noteSLocAddressSpaceUsage` to report details to users and
699 // use a source location from `Info` to point at an error.
700 // Currently, both cause Clang to run indefinitely, this needs to be fixed.
701 // FIXME: return an error instead of crashing. Returning invalid source
702 // locations causes compiler to run indefinitely.
703 llvm::report_fatal_error("ran out of source locations");
704 }
705 // See createFileID for that +1.
706 NextLocalOffset += Length + 1;
707 updateSlocUsageStats();
708 return SourceLocation::getMacroLoc(NextLocalOffset - (Length + 1));
709}
710
711std::optional<llvm::MemoryBufferRef>
713 SrcMgr::ContentCache &IR = getOrCreateContentCache(File);
714 return IR.getBufferOrNone(Diag, getFileManager(), SourceLocation());
715}
716
718 FileEntryRef SourceFile, std::unique_ptr<llvm::MemoryBuffer> Buffer) {
719 SrcMgr::ContentCache &IR = getOrCreateContentCache(SourceFile);
720
721 IR.setBuffer(std::move(Buffer));
722 IR.BufferOverridden = true;
723
724 getOverriddenFilesInfo().OverriddenFilesWithBuffer.insert(SourceFile);
725}
726
728 FileEntryRef NewFile) {
729 assert(SourceFile->getSize() == NewFile.getSize() &&
730 "Different sizes, use the FileManager to create a virtual file with "
731 "the correct size");
732 assert(FileInfos.find_as(SourceFile) == FileInfos.end() &&
733 "This function should be called at the initialization stage, before "
734 "any parsing occurs.");
735 // FileEntryRef is not default-constructible.
736 auto Pair = getOverriddenFilesInfo().OverriddenFiles.insert(
737 std::make_pair(SourceFile, NewFile));
738 if (!Pair.second)
739 Pair.first->second = NewFile;
740}
741
744 assert(isFileOverridden(&File.getFileEntry()));
745 OptionalFileEntryRef BypassFile = FileMgr.getBypassFile(File);
746
747 // If the file can't be found in the FS, give up.
748 if (!BypassFile)
749 return std::nullopt;
750
751 (void)getOrCreateContentCache(*BypassFile);
752 return BypassFile;
753}
754
756 getOrCreateContentCache(File).IsTransient = true;
757}
758
759std::optional<StringRef>
761 if (const SrcMgr::SLocEntry *Entry = getSLocEntryForFile(FID))
762 if (Entry->getFile().getContentCache().OrigEntry)
763 return Entry->getFile().getName();
764 return std::nullopt;
765}
766
767StringRef SourceManager::getBufferData(FileID FID, bool *Invalid) const {
768 auto B = getBufferDataOrNone(FID);
769 if (Invalid)
770 *Invalid = !B;
771 return B ? *B : "<<<<<INVALID SOURCE LOCATION>>>>>";
772}
773
774std::optional<StringRef>
776 if (const SrcMgr::SLocEntry *Entry = getSLocEntryForFile(FID))
777 return Entry->getFile().getContentCache().getBufferDataIfLoaded();
778 return std::nullopt;
779}
780
781std::optional<StringRef> SourceManager::getBufferDataOrNone(FileID FID) const {
782 if (const SrcMgr::SLocEntry *Entry = getSLocEntryForFile(FID))
783 if (auto B = Entry->getFile().getContentCache().getBufferOrNone(
784 Diag, getFileManager(), SourceLocation()))
785 return B->getBuffer();
786 return std::nullopt;
787}
788
789//===----------------------------------------------------------------------===//
790// SourceLocation manipulation methods.
791//===----------------------------------------------------------------------===//
792
793/// Return the FileID for a SourceLocation.
794///
795/// This is the cache-miss path of getFileID. Not as hot as that function, but
796/// still very important. It is responsible for finding the entry in the
797/// SLocEntry tables that contains the specified location.
798FileID SourceManager::getFileIDSlow(SourceLocation::UIntTy SLocOffset) const {
799 if (!SLocOffset)
800 return FileID::get(0);
801
802 // Now it is time to search for the correct file. See where the SLocOffset
803 // sits in the global view and consult local or loaded buffers for it.
804 if (SLocOffset < NextLocalOffset)
805 return getFileIDLocal(SLocOffset);
806 return getFileIDLoaded(SLocOffset);
807}
808
809/// Return the FileID for a SourceLocation with a low offset.
810///
811/// This function knows that the SourceLocation is in a local buffer, not a
812/// loaded one.
813FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const {
814 assert(SLocOffset < NextLocalOffset && "Bad function choice");
815
816 // After the first and second level caches, I see two common sorts of
817 // behavior: 1) a lot of searched FileID's are "near" the cached file
818 // location or are "near" the cached expansion location. 2) others are just
819 // completely random and may be a very long way away.
820 //
821 // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
822 // then we fall back to a less cache efficient, but more scalable, binary
823 // search to find the location.
824
825 // See if this is near the file point - worst case we start scanning from the
826 // most newly created FileID.
827
828 // LessIndex - This is the lower bound of the range that we're searching.
829 // We know that the offset corresponding to the FileID is less than
830 // SLocOffset.
831 unsigned LessIndex = 0;
832 // upper bound of the search range.
833 unsigned GreaterIndex = LocalSLocEntryTable.size();
834 if (LastFileIDLookup.ID >= 0) {
835 // Use the LastFileIDLookup to prune the search space.
836 if (LocalSLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset)
837 LessIndex = LastFileIDLookup.ID;
838 else
839 GreaterIndex = LastFileIDLookup.ID;
840 }
841
842 // Find the FileID that contains this.
843 unsigned NumProbes = 0;
844 while (true) {
845 --GreaterIndex;
846 assert(GreaterIndex < LocalSLocEntryTable.size());
847 if (LocalSLocEntryTable[GreaterIndex].getOffset() <= SLocOffset) {
848 FileID Res = FileID::get(int(GreaterIndex));
849 // Remember it. We have good locality across FileID lookups.
850 LastFileIDLookup = Res;
851 NumLinearScans += NumProbes+1;
852 return Res;
853 }
854 if (++NumProbes == 8)
855 break;
856 }
857
858 NumProbes = 0;
859 while (true) {
860 unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
861 SourceLocation::UIntTy MidOffset =
862 getLocalSLocEntry(MiddleIndex).getOffset();
863
864 ++NumProbes;
865
866 // If the offset of the midpoint is too large, chop the high side of the
867 // range to the midpoint.
868 if (MidOffset > SLocOffset) {
869 GreaterIndex = MiddleIndex;
870 continue;
871 }
872
873 // If the middle index contains the value, succeed and return.
874 if (MiddleIndex + 1 == LocalSLocEntryTable.size() ||
875 SLocOffset < getLocalSLocEntry(MiddleIndex + 1).getOffset()) {
876 FileID Res = FileID::get(MiddleIndex);
877
878 // Remember it. We have good locality across FileID lookups.
879 LastFileIDLookup = Res;
880 NumBinaryProbes += NumProbes;
881 return Res;
882 }
883
884 // Otherwise, move the low-side up to the middle index.
885 LessIndex = MiddleIndex;
886 }
887}
888
889/// Return the FileID for a SourceLocation with a high offset.
890///
891/// This function knows that the SourceLocation is in a loaded buffer, not a
892/// local one.
893FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const {
894 if (SLocOffset < CurrentLoadedOffset) {
895 assert(0 && "Invalid SLocOffset or bad function choice");
896 return FileID();
897 }
898
899 return FileID::get(ExternalSLocEntries->getSLocEntryID(SLocOffset));
900}
901
902SourceLocation SourceManager::
903getExpansionLocSlowCase(SourceLocation Loc) const {
904 do {
905 // Note: If Loc indicates an offset into a token that came from a macro
906 // expansion (e.g. the 5th character of the token) we do not want to add
907 // this offset when going to the expansion location. The expansion
908 // location is the macro invocation, which the offset has nothing to do
909 // with. This is unlike when we get the spelling loc, because the offset
910 // directly correspond to the token whose spelling we're inspecting.
912 } while (!Loc.isFileID());
913
914 return Loc;
915}
916
917SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
918 do {
919 std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
920 Loc = getSLocEntry(LocInfo.first).getExpansion().getSpellingLoc();
921 Loc = Loc.getLocWithOffset(LocInfo.second);
922 } while (!Loc.isFileID());
923 return Loc;
924}
925
926SourceLocation SourceManager::getFileLocSlowCase(SourceLocation Loc) const {
927 do {
930 else
932 } while (!Loc.isFileID());
933 return Loc;
934}
935
936
937std::pair<FileID, unsigned>
938SourceManager::getDecomposedExpansionLocSlowCase(
939 const SrcMgr::SLocEntry *E) const {
940 // If this is an expansion record, walk through all the expansion points.
941 FileID FID;
943 unsigned Offset;
944 do {
945 Loc = E->getExpansion().getExpansionLocStart();
946
947 FID = getFileID(Loc);
948 E = &getSLocEntry(FID);
949 Offset = Loc.getOffset()-E->getOffset();
950 } while (!Loc.isFileID());
951
952 return std::make_pair(FID, Offset);
953}
954
955std::pair<FileID, unsigned>
956SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
957 unsigned Offset) const {
958 // If this is an expansion record, walk through all the expansion points.
959 FileID FID;
961 do {
962 Loc = E->getExpansion().getSpellingLoc();
963 Loc = Loc.getLocWithOffset(Offset);
964
965 FID = getFileID(Loc);
966 E = &getSLocEntry(FID);
967 Offset = Loc.getOffset()-E->getOffset();
968 } while (!Loc.isFileID());
969
970 return std::make_pair(FID, Offset);
971}
972
973/// getImmediateSpellingLoc - Given a SourceLocation object, return the
974/// spelling location referenced by the ID. This is the first level down
975/// towards the place where the characters that make up the lexed token can be
976/// found. This should not generally be used by clients.
978 if (Loc.isFileID()) return Loc;
979 std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
980 Loc = getSLocEntry(LocInfo.first).getExpansion().getSpellingLoc();
981 return Loc.getLocWithOffset(LocInfo.second);
982}
983
984/// Return the filename of the file containing a SourceLocation.
985StringRef SourceManager::getFilename(SourceLocation SpellingLoc) const {
987 return F->getName();
988 return StringRef();
989}
990
991/// getImmediateExpansionRange - Loc is required to be an expansion location.
992/// Return the start/end of the expansion information.
995 assert(Loc.isMacroID() && "Not a macro expansion loc!");
996 const ExpansionInfo &Expansion = getSLocEntry(getFileID(Loc)).getExpansion();
997 return Expansion.getExpansionLocRange();
998}
999
1001 while (isMacroArgExpansion(Loc))
1003 return Loc;
1004}
1005
1006/// getExpansionRange - Given a SourceLocation object, return the range of
1007/// tokens covered by the expansion in the ultimate file.
1009 if (Loc.isFileID())
1010 return CharSourceRange(SourceRange(Loc, Loc), true);
1011
1013
1014 // Fully resolve the start and end locations to their ultimate expansion
1015 // points.
1016 while (!Res.getBegin().isFileID())
1018 while (!Res.getEnd().isFileID()) {
1020 Res.setEnd(EndRange.getEnd());
1021 Res.setTokenRange(EndRange.isTokenRange());
1022 }
1023 return Res;
1024}
1025
1027 SourceLocation *StartLoc) const {
1028 if (!Loc.isMacroID()) return false;
1029
1030 FileID FID = getFileID(Loc);
1031 const SrcMgr::ExpansionInfo &Expansion = getSLocEntry(FID).getExpansion();
1032 if (!Expansion.isMacroArgExpansion()) return false;
1033
1034 if (StartLoc)
1035 *StartLoc = Expansion.getExpansionLocStart();
1036 return true;
1037}
1038
1040 if (!Loc.isMacroID()) return false;
1041
1042 FileID FID = getFileID(Loc);
1043 const SrcMgr::ExpansionInfo &Expansion = getSLocEntry(FID).getExpansion();
1044 return Expansion.isMacroBodyExpansion();
1045}
1046
1048 SourceLocation *MacroBegin) const {
1049 assert(Loc.isValid() && Loc.isMacroID() && "Expected a valid macro loc");
1050
1051 std::pair<FileID, unsigned> DecompLoc = getDecomposedLoc(Loc);
1052 if (DecompLoc.second > 0)
1053 return false; // Does not point at the start of expansion range.
1054
1055 bool Invalid = false;
1056 const SrcMgr::ExpansionInfo &ExpInfo =
1057 getSLocEntry(DecompLoc.first, &Invalid).getExpansion();
1058 if (Invalid)
1059 return false;
1060 SourceLocation ExpLoc = ExpInfo.getExpansionLocStart();
1061
1062 if (ExpInfo.isMacroArgExpansion()) {
1063 // For macro argument expansions, check if the previous FileID is part of
1064 // the same argument expansion, in which case this Loc is not at the
1065 // beginning of the expansion.
1066 FileID PrevFID = getPreviousFileID(DecompLoc.first);
1067 if (!PrevFID.isInvalid()) {
1068 const SrcMgr::SLocEntry &PrevEntry = getSLocEntry(PrevFID, &Invalid);
1069 if (Invalid)
1070 return false;
1071 if (PrevEntry.isExpansion() &&
1072 PrevEntry.getExpansion().getExpansionLocStart() == ExpLoc)
1073 return false;
1074 }
1075 }
1076
1077 if (MacroBegin)
1078 *MacroBegin = ExpLoc;
1079 return true;
1080}
1081
1083 SourceLocation *MacroEnd) const {
1084 assert(Loc.isValid() && Loc.isMacroID() && "Expected a valid macro loc");
1085
1086 FileID FID = getFileID(Loc);
1087 SourceLocation NextLoc = Loc.getLocWithOffset(1);
1088 if (isInFileID(NextLoc, FID))
1089 return false; // Does not point at the end of expansion range.
1090
1091 bool Invalid = false;
1092 const SrcMgr::ExpansionInfo &ExpInfo =
1094 if (Invalid)
1095 return false;
1096
1097 if (ExpInfo.isMacroArgExpansion()) {
1098 // For macro argument expansions, check if the next FileID is part of the
1099 // same argument expansion, in which case this Loc is not at the end of the
1100 // expansion.
1101 FileID NextFID = getNextFileID(FID);
1102 if (!NextFID.isInvalid()) {
1103 const SrcMgr::SLocEntry &NextEntry = getSLocEntry(NextFID, &Invalid);
1104 if (Invalid)
1105 return false;
1106 if (NextEntry.isExpansion() &&
1107 NextEntry.getExpansion().getExpansionLocStart() ==
1108 ExpInfo.getExpansionLocStart())
1109 return false;
1110 }
1111 }
1112
1113 if (MacroEnd)
1114 *MacroEnd = ExpInfo.getExpansionLocEnd();
1115 return true;
1116}
1117
1118//===----------------------------------------------------------------------===//
1119// Queries about the code at a SourceLocation.
1120//===----------------------------------------------------------------------===//
1121
1122/// getCharacterData - Return a pointer to the start of the specified location
1123/// in the appropriate MemoryBuffer.
1125 bool *Invalid) const {
1126 // Note that this is a hot function in the getSpelling() path, which is
1127 // heavily used by -E mode.
1128 std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
1129
1130 // Note that calling 'getBuffer()' may lazily page in a source file.
1131 bool CharDataInvalid = false;
1132 const SLocEntry &Entry = getSLocEntry(LocInfo.first, &CharDataInvalid);
1133 if (CharDataInvalid || !Entry.isFile()) {
1134 if (Invalid)
1135 *Invalid = true;
1136
1137 return "<<<<INVALID BUFFER>>>>";
1138 }
1139 std::optional<llvm::MemoryBufferRef> Buffer =
1141 SourceLocation());
1142 if (Invalid)
1143 *Invalid = !Buffer;
1144 return Buffer ? Buffer->getBufferStart() + LocInfo.second
1145 : "<<<<INVALID BUFFER>>>>";
1146}
1147
1148/// getColumnNumber - Return the column # for the specified file position.
1149/// this is significantly cheaper to compute than the line number.
1150unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos,
1151 bool *Invalid) const {
1152 std::optional<llvm::MemoryBufferRef> MemBuf = getBufferOrNone(FID);
1153 if (Invalid)
1154 *Invalid = !MemBuf;
1155
1156 if (!MemBuf)
1157 return 1;
1158
1159 // It is okay to request a position just past the end of the buffer.
1160 if (FilePos > MemBuf->getBufferSize()) {
1161 if (Invalid)
1162 *Invalid = true;
1163 return 1;
1164 }
1165
1166 const char *Buf = MemBuf->getBufferStart();
1167 // See if we just calculated the line number for this FilePos and can use
1168 // that to lookup the start of the line instead of searching for it.
1169 if (LastLineNoFileIDQuery == FID && LastLineNoContentCache->SourceLineCache &&
1170 LastLineNoResult < LastLineNoContentCache->SourceLineCache.size()) {
1171 const unsigned *SourceLineCache =
1172 LastLineNoContentCache->SourceLineCache.begin();
1173 unsigned LineStart = SourceLineCache[LastLineNoResult - 1];
1174 unsigned LineEnd = SourceLineCache[LastLineNoResult];
1175 if (FilePos >= LineStart && FilePos < LineEnd) {
1176 // LineEnd is the LineStart of the next line.
1177 // A line ends with separator LF or CR+LF on Windows.
1178 // FilePos might point to the last separator,
1179 // but we need a column number at most 1 + the last column.
1180 if (FilePos + 1 == LineEnd && FilePos > LineStart) {
1181 if (Buf[FilePos - 1] == '\r' || Buf[FilePos - 1] == '\n')
1182 --FilePos;
1183 }
1184 return FilePos - LineStart + 1;
1185 }
1186 }
1187
1188 unsigned LineStart = FilePos;
1189 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
1190 --LineStart;
1191 return FilePos-LineStart+1;
1192}
1193
1194// isInvalid - Return the result of calling loc.isInvalid(), and
1195// if Invalid is not null, set its value to same.
1196template<typename LocType>
1197static bool isInvalid(LocType Loc, bool *Invalid) {
1198 bool MyInvalid = Loc.isInvalid();
1199 if (Invalid)
1200 *Invalid = MyInvalid;
1201 return MyInvalid;
1202}
1203
1205 bool *Invalid) const {
1206 if (isInvalid(Loc, Invalid)) return 0;
1207 std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
1208 return getColumnNumber(LocInfo.first, LocInfo.second, Invalid);
1209}
1210
1212 bool *Invalid) const {
1213 if (isInvalid(Loc, Invalid)) return 0;
1214 std::pair<FileID, unsigned> LocInfo = getDecomposedExpansionLoc(Loc);
1215 return getColumnNumber(LocInfo.first, LocInfo.second, Invalid);
1216}
1217
1219 bool *Invalid) const {
1221 if (isInvalid(PLoc, Invalid)) return 0;
1222 return PLoc.getColumn();
1223}
1224
1225// Check if mutli-byte word x has bytes between m and n, included. This may also
1226// catch bytes equal to n + 1.
1227// The returned value holds a 0x80 at each byte position that holds a match.
1228// see http://graphics.stanford.edu/~seander/bithacks.html#HasBetweenInWord
1229template <class T>
1230static constexpr inline T likelyhasbetween(T x, unsigned char m,
1231 unsigned char n) {
1232 return ((x - ~static_cast<T>(0) / 255 * (n + 1)) & ~x &
1233 ((x & ~static_cast<T>(0) / 255 * 127) +
1234 (~static_cast<T>(0) / 255 * (127 - (m - 1))))) &
1235 ~static_cast<T>(0) / 255 * 128;
1236}
1237
1238LineOffsetMapping LineOffsetMapping::get(llvm::MemoryBufferRef Buffer,
1239 llvm::BumpPtrAllocator &Alloc) {
1240
1241 // Find the file offsets of all of the *physical* source lines. This does
1242 // not look at trigraphs, escaped newlines, or anything else tricky.
1243 SmallVector<unsigned, 256> LineOffsets;
1244
1245 // Line #1 starts at char 0.
1246 LineOffsets.push_back(0);
1247
1248 const unsigned char *Start = (const unsigned char *)Buffer.getBufferStart();
1249 const unsigned char *End = (const unsigned char *)Buffer.getBufferEnd();
1250 const unsigned char *Buf = Start;
1251
1252 uint64_t Word;
1253
1254 // scan sizeof(Word) bytes at a time for new lines.
1255 // This is much faster than scanning each byte independently.
1256 if ((unsigned long)(End - Start) > sizeof(Word)) {
1257 do {
1258 Word = llvm::support::endian::read64(Buf, llvm::endianness::little);
1259 // no new line => jump over sizeof(Word) bytes.
1260 auto Mask = likelyhasbetween(Word, '\n', '\r');
1261 if (!Mask) {
1262 Buf += sizeof(Word);
1263 continue;
1264 }
1265
1266 // At that point, Mask contains 0x80 set at each byte that holds a value
1267 // in [\n, \r + 1 [
1268
1269 // Scan for the next newline - it's very likely there's one.
1270 unsigned N = llvm::countr_zero(Mask) - 7; // -7 because 0x80 is the marker
1271 Word >>= N;
1272 Buf += N / 8 + 1;
1273 unsigned char Byte = Word;
1274 switch (Byte) {
1275 case '\r':
1276 // If this is \r\n, skip both characters.
1277 if (*Buf == '\n') {
1278 ++Buf;
1279 }
1280 [[fallthrough]];
1281 case '\n':
1282 LineOffsets.push_back(Buf - Start);
1283 };
1284 } while (Buf < End - sizeof(Word) - 1);
1285 }
1286
1287 // Handle tail using a regular check.
1288 while (Buf < End) {
1289 if (*Buf == '\n') {
1290 LineOffsets.push_back(Buf - Start + 1);
1291 } else if (*Buf == '\r') {
1292 // If this is \r\n, skip both characters.
1293 if (Buf + 1 < End && Buf[1] == '\n') {
1294 ++Buf;
1295 }
1296 LineOffsets.push_back(Buf - Start + 1);
1297 }
1298 ++Buf;
1299 }
1300
1301 return LineOffsetMapping(LineOffsets, Alloc);
1302}
1303
1305 llvm::BumpPtrAllocator &Alloc)
1306 : Storage(Alloc.Allocate<unsigned>(LineOffsets.size() + 1)) {
1307 Storage[0] = LineOffsets.size();
1308 std::copy(LineOffsets.begin(), LineOffsets.end(), Storage + 1);
1309}
1310
1311/// getLineNumber - Given a SourceLocation, return the spelling line number
1312/// for the position indicated. This requires building and caching a table of
1313/// line offsets for the MemoryBuffer, so this is not cheap: use only when
1314/// about to emit a diagnostic.
1315unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos,
1316 bool *Invalid) const {
1317 if (FID.isInvalid()) {
1318 if (Invalid)
1319 *Invalid = true;
1320 return 1;
1321 }
1322
1323 const ContentCache *Content;
1324 if (LastLineNoFileIDQuery == FID)
1325 Content = LastLineNoContentCache;
1326 else {
1327 bool MyInvalid = false;
1328 const SLocEntry &Entry = getSLocEntry(FID, &MyInvalid);
1329 if (MyInvalid || !Entry.isFile()) {
1330 if (Invalid)
1331 *Invalid = true;
1332 return 1;
1333 }
1334
1335 Content = &Entry.getFile().getContentCache();
1336 }
1337
1338 // If this is the first use of line information for this buffer, compute the
1339 // SourceLineCache for it on demand.
1340 if (!Content->SourceLineCache) {
1341 std::optional<llvm::MemoryBufferRef> Buffer =
1342 Content->getBufferOrNone(Diag, getFileManager(), SourceLocation());
1343 if (Invalid)
1344 *Invalid = !Buffer;
1345 if (!Buffer)
1346 return 1;
1347
1348 Content->SourceLineCache =
1349 LineOffsetMapping::get(*Buffer, ContentCacheAlloc);
1350 } else if (Invalid)
1351 *Invalid = false;
1352
1353 // Okay, we know we have a line number table. Do a binary search to find the
1354 // line number that this character position lands on.
1355 const unsigned *SourceLineCache = Content->SourceLineCache.begin();
1356 const unsigned *SourceLineCacheStart = SourceLineCache;
1357 const unsigned *SourceLineCacheEnd = Content->SourceLineCache.end();
1358
1359 unsigned QueriedFilePos = FilePos+1;
1360
1361 // FIXME: I would like to be convinced that this code is worth being as
1362 // complicated as it is, binary search isn't that slow.
1363 //
1364 // If it is worth being optimized, then in my opinion it could be more
1365 // performant, simpler, and more obviously correct by just "galloping" outward
1366 // from the queried file position. In fact, this could be incorporated into a
1367 // generic algorithm such as lower_bound_with_hint.
1368 //
1369 // If someone gives me a test case where this matters, and I will do it! - DWD
1370
1371 // If the previous query was to the same file, we know both the file pos from
1372 // that query and the line number returned. This allows us to narrow the
1373 // search space from the entire file to something near the match.
1374 if (LastLineNoFileIDQuery == FID) {
1375 if (QueriedFilePos >= LastLineNoFilePos) {
1376 // FIXME: Potential overflow?
1377 SourceLineCache = SourceLineCache+LastLineNoResult-1;
1378
1379 // The query is likely to be nearby the previous one. Here we check to
1380 // see if it is within 5, 10 or 20 lines. It can be far away in cases
1381 // where big comment blocks and vertical whitespace eat up lines but
1382 // contribute no tokens.
1383 if (SourceLineCache+5 < SourceLineCacheEnd) {
1384 if (SourceLineCache[5] > QueriedFilePos)
1385 SourceLineCacheEnd = SourceLineCache+5;
1386 else if (SourceLineCache+10 < SourceLineCacheEnd) {
1387 if (SourceLineCache[10] > QueriedFilePos)
1388 SourceLineCacheEnd = SourceLineCache+10;
1389 else if (SourceLineCache+20 < SourceLineCacheEnd) {
1390 if (SourceLineCache[20] > QueriedFilePos)
1391 SourceLineCacheEnd = SourceLineCache+20;
1392 }
1393 }
1394 }
1395 } else {
1396 if (LastLineNoResult < Content->SourceLineCache.size())
1397 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
1398 }
1399 }
1400
1401 const unsigned *Pos =
1402 std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
1403 unsigned LineNo = Pos-SourceLineCacheStart;
1404
1405 LastLineNoFileIDQuery = FID;
1406 LastLineNoContentCache = Content;
1407 LastLineNoFilePos = QueriedFilePos;
1408 LastLineNoResult = LineNo;
1409 return LineNo;
1410}
1411
1413 bool *Invalid) const {
1414 if (isInvalid(Loc, Invalid)) return 0;
1415 std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
1416 return getLineNumber(LocInfo.first, LocInfo.second);
1417}
1419 bool *Invalid) const {
1420 if (isInvalid(Loc, Invalid)) return 0;
1421 std::pair<FileID, unsigned> LocInfo = getDecomposedExpansionLoc(Loc);
1422 return getLineNumber(LocInfo.first, LocInfo.second);
1423}
1425 bool *Invalid) const {
1426 PresumedLoc PLoc = getPresumedLoc(Loc);
1427 if (isInvalid(PLoc, Invalid)) return 0;
1428 return PLoc.getLine();
1429}
1430
1431/// getFileCharacteristic - return the file characteristic of the specified
1432/// source location, indicating whether this is a normal file, a system
1433/// header, or an "implicit extern C" system header.
1434///
1435/// This state can be modified with flags on GNU linemarker directives like:
1436/// # 4 "foo.h" 3
1437/// which changes all source locations in the current file after that to be
1438/// considered to be from a system header.
1441 assert(Loc.isValid() && "Can't get file characteristic of invalid loc!");
1442 std::pair<FileID, unsigned> LocInfo = getDecomposedExpansionLoc(Loc);
1443 const SLocEntry *SEntry = getSLocEntryForFile(LocInfo.first);
1444 if (!SEntry)
1445 return C_User;
1446
1447 const SrcMgr::FileInfo &FI = SEntry->getFile();
1448
1449 // If there are no #line directives in this file, just return the whole-file
1450 // state.
1451 if (!FI.hasLineDirectives())
1452 return FI.getFileCharacteristic();
1453
1454 assert(LineTable && "Can't have linetable entries without a LineTable!");
1455 // See if there is a #line directive before the location.
1456 const LineEntry *Entry =
1457 LineTable->FindNearestLineEntry(LocInfo.first, LocInfo.second);
1458
1459 // If this is before the first line marker, use the file characteristic.
1460 if (!Entry)
1461 return FI.getFileCharacteristic();
1462
1463 return Entry->FileKind;
1464}
1465
1466/// Return the filename or buffer identifier of the buffer the location is in.
1467/// Note that this name does not respect \#line directives. Use getPresumedLoc
1468/// for normal clients.
1470 bool *Invalid) const {
1471 if (isInvalid(Loc, Invalid)) return "<invalid loc>";
1472
1473 auto B = getBufferOrNone(getFileID(Loc));
1474 if (Invalid)
1475 *Invalid = !B;
1476 return B ? B->getBufferIdentifier() : "<invalid buffer>";
1477}
1478
1479/// getPresumedLoc - This method returns the "presumed" location of a
1480/// SourceLocation specifies. A "presumed location" can be modified by \#line
1481/// or GNU line marker directives. This provides a view on the data that a
1482/// user should see in diagnostics, for example.
1483///
1484/// Note that a presumed location is always given as the expansion point of an
1485/// expansion location, not at the spelling location.
1487 bool UseLineDirectives) const {
1488 if (Loc.isInvalid()) return PresumedLoc();
1489
1490 // Presumed locations are always for expansion points.
1491 std::pair<FileID, unsigned> LocInfo = getDecomposedExpansionLoc(Loc);
1492
1493 bool Invalid = false;
1494 const SLocEntry &Entry = getSLocEntry(LocInfo.first, &Invalid);
1495 if (Invalid || !Entry.isFile())
1496 return PresumedLoc();
1497
1498 const SrcMgr::FileInfo &FI = Entry.getFile();
1499 const SrcMgr::ContentCache *C = &FI.getContentCache();
1500
1501 // To get the source name, first consult the FileEntry (if one exists)
1502 // before the MemBuffer as this will avoid unnecessarily paging in the
1503 // MemBuffer.
1504 FileID FID = LocInfo.first;
1505 StringRef Filename;
1506 if (C->OrigEntry)
1507 Filename = C->OrigEntry->getName();
1508 else if (auto Buffer = C->getBufferOrNone(Diag, getFileManager()))
1509 Filename = Buffer->getBufferIdentifier();
1510
1511 unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second, &Invalid);
1512 if (Invalid)
1513 return PresumedLoc();
1514 unsigned ColNo = getColumnNumber(LocInfo.first, LocInfo.second, &Invalid);
1515 if (Invalid)
1516 return PresumedLoc();
1517
1518 SourceLocation IncludeLoc = FI.getIncludeLoc();
1519
1520 // If we have #line directives in this file, update and overwrite the physical
1521 // location info if appropriate.
1522 if (UseLineDirectives && FI.hasLineDirectives()) {
1523 assert(LineTable && "Can't have linetable entries without a LineTable!");
1524 // See if there is a #line directive before this. If so, get it.
1525 if (const LineEntry *Entry =
1526 LineTable->FindNearestLineEntry(LocInfo.first, LocInfo.second)) {
1527 // If the LineEntry indicates a filename, use it.
1528 if (Entry->FilenameID != -1) {
1529 Filename = LineTable->getFilename(Entry->FilenameID);
1530 // The contents of files referenced by #line are not in the
1531 // SourceManager
1532 FID = FileID::get(0);
1533 }
1534
1535 // Use the line number specified by the LineEntry. This line number may
1536 // be multiple lines down from the line entry. Add the difference in
1537 // physical line numbers from the query point and the line marker to the
1538 // total.
1539 unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
1540 LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
1541
1542 // Note that column numbers are not molested by line markers.
1543
1544 // Handle virtual #include manipulation.
1545 if (Entry->IncludeOffset) {
1546 IncludeLoc = getLocForStartOfFile(LocInfo.first);
1547 IncludeLoc = IncludeLoc.getLocWithOffset(Entry->IncludeOffset);
1548 }
1549 }
1550 }
1551
1552 return PresumedLoc(Filename.data(), FID, LineNo, ColNo, IncludeLoc);
1553}
1554
1555/// Returns whether the PresumedLoc for a given SourceLocation is
1556/// in the main file.
1557///
1558/// This computes the "presumed" location for a SourceLocation, then checks
1559/// whether it came from a file other than the main file. This is different
1560/// from isWrittenInMainFile() because it takes line marker directives into
1561/// account.
1563 if (Loc.isInvalid()) return false;
1564
1565 // Presumed locations are always for expansion points.
1566 std::pair<FileID, unsigned> LocInfo = getDecomposedExpansionLoc(Loc);
1567
1568 const SLocEntry *Entry = getSLocEntryForFile(LocInfo.first);
1569 if (!Entry)
1570 return false;
1571
1572 const SrcMgr::FileInfo &FI = Entry->getFile();
1573
1574 // Check if there is a line directive for this location.
1575 if (FI.hasLineDirectives())
1576 if (const LineEntry *Entry =
1577 LineTable->FindNearestLineEntry(LocInfo.first, LocInfo.second))
1578 if (Entry->IncludeOffset)
1579 return false;
1580
1581 return FI.getIncludeLoc().isInvalid();
1582}
1583
1584/// The size of the SLocEntry that \p FID represents.
1586 bool Invalid = false;
1587 const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &Invalid);
1588 if (Invalid)
1589 return 0;
1590
1591 int ID = FID.ID;
1592 SourceLocation::UIntTy NextOffset;
1593 if ((ID > 0 && unsigned(ID+1) == local_sloc_entry_size()))
1594 NextOffset = getNextLocalOffset();
1595 else if (ID+1 == -1)
1596 NextOffset = MaxLoadedOffset;
1597 else
1598 NextOffset = getSLocEntry(FileID::get(ID+1)).getOffset();
1599
1600 return NextOffset - Entry.getOffset() - 1;
1601}
1602
1603//===----------------------------------------------------------------------===//
1604// Other miscellaneous methods.
1605//===----------------------------------------------------------------------===//
1606
1607/// Get the source location for the given file:line:col triplet.
1608///
1609/// If the source file is included multiple times, the source location will
1610/// be based upon an arbitrary inclusion.
1612 unsigned Line,
1613 unsigned Col) const {
1614 assert(SourceFile && "Null source file!");
1615 assert(Line && Col && "Line and column should start from 1!");
1616
1617 FileID FirstFID = translateFile(SourceFile);
1618 return translateLineCol(FirstFID, Line, Col);
1619}
1620
1621/// Get the FileID for the given file.
1622///
1623/// If the source file is included multiple times, the FileID will be the
1624/// first inclusion.
1626 assert(SourceFile && "Null source file!");
1627
1628 // First, check the main file ID, since it is common to look for a
1629 // location in the main file.
1630 if (MainFileID.isValid()) {
1631 bool Invalid = false;
1632 const SLocEntry &MainSLoc = getSLocEntry(MainFileID, &Invalid);
1633 if (Invalid)
1634 return FileID();
1635
1636 if (MainSLoc.isFile()) {
1637 if (MainSLoc.getFile().getContentCache().OrigEntry == SourceFile)
1638 return MainFileID;
1639 }
1640 }
1641
1642 // The location we're looking for isn't in the main file; look
1643 // through all of the local source locations.
1644 for (unsigned I = 0, N = local_sloc_entry_size(); I != N; ++I) {
1645 const SLocEntry &SLoc = getLocalSLocEntry(I);
1646 if (SLoc.isFile() &&
1647 SLoc.getFile().getContentCache().OrigEntry == SourceFile)
1648 return FileID::get(I);
1649 }
1650
1651 // If that still didn't help, try the modules.
1652 for (unsigned I = 0, N = loaded_sloc_entry_size(); I != N; ++I) {
1653 const SLocEntry &SLoc = getLoadedSLocEntry(I);
1654 if (SLoc.isFile() &&
1655 SLoc.getFile().getContentCache().OrigEntry == SourceFile)
1656 return FileID::get(-int(I) - 2);
1657 }
1658
1659 return FileID();
1660}
1661
1662/// Get the source location in \arg FID for the given line:col.
1663/// Returns null location if \arg FID is not a file SLocEntry.
1665 unsigned Line,
1666 unsigned Col) const {
1667 // Lines are used as a one-based index into a zero-based array. This assert
1668 // checks for possible buffer underruns.
1669 assert(Line && Col && "Line and column should start from 1!");
1670
1671 if (FID.isInvalid())
1672 return SourceLocation();
1673
1674 bool Invalid = false;
1675 const SLocEntry &Entry = getSLocEntry(FID, &Invalid);
1676 if (Invalid)
1677 return SourceLocation();
1678
1679 if (!Entry.isFile())
1680 return SourceLocation();
1681
1682 SourceLocation FileLoc = SourceLocation::getFileLoc(Entry.getOffset());
1683
1684 if (Line == 1 && Col == 1)
1685 return FileLoc;
1686
1687 const ContentCache *Content = &Entry.getFile().getContentCache();
1688
1689 // If this is the first use of line information for this buffer, compute the
1690 // SourceLineCache for it on demand.
1691 std::optional<llvm::MemoryBufferRef> Buffer =
1692 Content->getBufferOrNone(Diag, getFileManager());
1693 if (!Buffer)
1694 return SourceLocation();
1695 if (!Content->SourceLineCache)
1696 Content->SourceLineCache =
1697 LineOffsetMapping::get(*Buffer, ContentCacheAlloc);
1698
1699 if (Line > Content->SourceLineCache.size()) {
1700 unsigned Size = Buffer->getBufferSize();
1701 if (Size > 0)
1702 --Size;
1703 return FileLoc.getLocWithOffset(Size);
1704 }
1705
1706 unsigned FilePos = Content->SourceLineCache[Line - 1];
1707 const char *Buf = Buffer->getBufferStart() + FilePos;
1708 unsigned BufLength = Buffer->getBufferSize() - FilePos;
1709 if (BufLength == 0)
1710 return FileLoc.getLocWithOffset(FilePos);
1711
1712 unsigned i = 0;
1713
1714 // Check that the given column is valid.
1715 while (i < BufLength-1 && i < Col-1 && Buf[i] != '\n' && Buf[i] != '\r')
1716 ++i;
1717 return FileLoc.getLocWithOffset(FilePos + i);
1718}
1719
1720/// Compute a map of macro argument chunks to their expanded source
1721/// location. Chunks that are not part of a macro argument will map to an
1722/// invalid source location. e.g. if a file contains one macro argument at
1723/// offset 100 with length 10, this is how the map will be formed:
1724/// 0 -> SourceLocation()
1725/// 100 -> Expanded macro arg location
1726/// 110 -> SourceLocation()
1727void SourceManager::computeMacroArgsCache(MacroArgsMap &MacroArgsCache,
1728 FileID FID) const {
1729 assert(FID.isValid());
1730
1731 // Initially no macro argument chunk is present.
1732 MacroArgsCache.insert(std::make_pair(0, SourceLocation()));
1733
1734 int ID = FID.ID;
1735 while (true) {
1736 ++ID;
1737 // Stop if there are no more FileIDs to check.
1738 if (ID > 0) {
1739 if (unsigned(ID) >= local_sloc_entry_size())
1740 return;
1741 } else if (ID == -1) {
1742 return;
1743 }
1744
1745 bool Invalid = false;
1746 const SrcMgr::SLocEntry &Entry = getSLocEntryByID(ID, &Invalid);
1747 if (Invalid)
1748 return;
1749 if (Entry.isFile()) {
1750 auto& File = Entry.getFile();
1751 if (File.getFileCharacteristic() == C_User_ModuleMap ||
1752 File.getFileCharacteristic() == C_System_ModuleMap)
1753 continue;
1754
1755 SourceLocation IncludeLoc = File.getIncludeLoc();
1756 bool IncludedInFID =
1757 (IncludeLoc.isValid() && isInFileID(IncludeLoc, FID)) ||
1758 // Predefined header doesn't have a valid include location in main
1759 // file, but any files created by it should still be skipped when
1760 // computing macro args expanded in the main file.
1761 (FID == MainFileID && Entry.getFile().getName() == "<built-in>");
1762 if (IncludedInFID) {
1763 // Skip the files/macros of the #include'd file, we only care about
1764 // macros that lexed macro arguments from our file.
1765 if (Entry.getFile().NumCreatedFIDs)
1766 ID += Entry.getFile().NumCreatedFIDs - 1 /*because of next ++ID*/;
1767 continue;
1768 }
1769 // If file was included but not from FID, there is no more files/macros
1770 // that may be "contained" in this file.
1771 if (IncludeLoc.isValid())
1772 return;
1773 continue;
1774 }
1775
1776 const ExpansionInfo &ExpInfo = Entry.getExpansion();
1777
1778 if (ExpInfo.getExpansionLocStart().isFileID()) {
1779 if (!isInFileID(ExpInfo.getExpansionLocStart(), FID))
1780 return; // No more files/macros that may be "contained" in this file.
1781 }
1782
1783 if (!ExpInfo.isMacroArgExpansion())
1784 continue;
1785
1786 associateFileChunkWithMacroArgExp(MacroArgsCache, FID,
1787 ExpInfo.getSpellingLoc(),
1788 SourceLocation::getMacroLoc(Entry.getOffset()),
1789 getFileIDSize(FileID::get(ID)));
1790 }
1791}
1792
1793void SourceManager::associateFileChunkWithMacroArgExp(
1794 MacroArgsMap &MacroArgsCache,
1795 FileID FID,
1796 SourceLocation SpellLoc,
1797 SourceLocation ExpansionLoc,
1798 unsigned ExpansionLength) const {
1799 if (!SpellLoc.isFileID()) {
1800 SourceLocation::UIntTy SpellBeginOffs = SpellLoc.getOffset();
1801 SourceLocation::UIntTy SpellEndOffs = SpellBeginOffs + ExpansionLength;
1802
1803 // The spelling range for this macro argument expansion can span multiple
1804 // consecutive FileID entries. Go through each entry contained in the
1805 // spelling range and if one is itself a macro argument expansion, recurse
1806 // and associate the file chunk that it represents.
1807
1808 FileID SpellFID; // Current FileID in the spelling range.
1809 unsigned SpellRelativeOffs;
1810 std::tie(SpellFID, SpellRelativeOffs) = getDecomposedLoc(SpellLoc);
1811 while (true) {
1812 const SLocEntry &Entry = getSLocEntry(SpellFID);
1813 SourceLocation::UIntTy SpellFIDBeginOffs = Entry.getOffset();
1814 unsigned SpellFIDSize = getFileIDSize(SpellFID);
1815 SourceLocation::UIntTy SpellFIDEndOffs = SpellFIDBeginOffs + SpellFIDSize;
1816 const ExpansionInfo &Info = Entry.getExpansion();
1817 if (Info.isMacroArgExpansion()) {
1818 unsigned CurrSpellLength;
1819 if (SpellFIDEndOffs < SpellEndOffs)
1820 CurrSpellLength = SpellFIDSize - SpellRelativeOffs;
1821 else
1822 CurrSpellLength = ExpansionLength;
1823 associateFileChunkWithMacroArgExp(MacroArgsCache, FID,
1824 Info.getSpellingLoc().getLocWithOffset(SpellRelativeOffs),
1825 ExpansionLoc, CurrSpellLength);
1826 }
1827
1828 if (SpellFIDEndOffs >= SpellEndOffs)
1829 return; // we covered all FileID entries in the spelling range.
1830
1831 // Move to the next FileID entry in the spelling range.
1832 unsigned advance = SpellFIDSize - SpellRelativeOffs + 1;
1833 ExpansionLoc = ExpansionLoc.getLocWithOffset(advance);
1834 ExpansionLength -= advance;
1835 ++SpellFID.ID;
1836 SpellRelativeOffs = 0;
1837 }
1838 }
1839
1840 assert(SpellLoc.isFileID());
1841
1842 unsigned BeginOffs;
1843 if (!isInFileID(SpellLoc, FID, &BeginOffs))
1844 return;
1845
1846 unsigned EndOffs = BeginOffs + ExpansionLength;
1847
1848 // Add a new chunk for this macro argument. A previous macro argument chunk
1849 // may have been lexed again, so e.g. if the map is
1850 // 0 -> SourceLocation()
1851 // 100 -> Expanded loc #1
1852 // 110 -> SourceLocation()
1853 // and we found a new macro FileID that lexed from offset 105 with length 3,
1854 // the new map will be:
1855 // 0 -> SourceLocation()
1856 // 100 -> Expanded loc #1
1857 // 105 -> Expanded loc #2
1858 // 108 -> Expanded loc #1
1859 // 110 -> SourceLocation()
1860 //
1861 // Since re-lexed macro chunks will always be the same size or less of
1862 // previous chunks, we only need to find where the ending of the new macro
1863 // chunk is mapped to and update the map with new begin/end mappings.
1864
1865 MacroArgsMap::iterator I = MacroArgsCache.upper_bound(EndOffs);
1866 --I;
1867 SourceLocation EndOffsMappedLoc = I->second;
1868 MacroArgsCache[BeginOffs] = ExpansionLoc;
1869 MacroArgsCache[EndOffs] = EndOffsMappedLoc;
1870}
1871
1872void SourceManager::updateSlocUsageStats() const {
1873 SourceLocation::UIntTy UsedBytes =
1874 NextLocalOffset + (MaxLoadedOffset - CurrentLoadedOffset);
1875 MaxUsedSLocBytes.updateMax(UsedBytes);
1876}
1877
1878/// If \arg Loc points inside a function macro argument, the returned
1879/// location will be the macro location in which the argument was expanded.
1880/// If a macro argument is used multiple times, the expanded location will
1881/// be at the first expansion of the argument.
1882/// e.g.
1883/// MY_MACRO(foo);
1884/// ^
1885/// Passing a file location pointing at 'foo', will yield a macro location
1886/// where 'foo' was expanded into.
1889 if (Loc.isInvalid() || !Loc.isFileID())
1890 return Loc;
1891
1892 FileID FID;
1893 unsigned Offset;
1894 std::tie(FID, Offset) = getDecomposedLoc(Loc);
1895 if (FID.isInvalid())
1896 return Loc;
1897
1898 std::unique_ptr<MacroArgsMap> &MacroArgsCache = MacroArgsCacheMap[FID];
1899 if (!MacroArgsCache) {
1900 MacroArgsCache = std::make_unique<MacroArgsMap>();
1901 computeMacroArgsCache(*MacroArgsCache, FID);
1902 }
1903
1904 assert(!MacroArgsCache->empty());
1905 MacroArgsMap::iterator I = MacroArgsCache->upper_bound(Offset);
1906 // In case every element in MacroArgsCache is greater than Offset we can't
1907 // decrement the iterator.
1908 if (I == MacroArgsCache->begin())
1909 return Loc;
1910
1911 --I;
1912
1913 SourceLocation::UIntTy MacroArgBeginOffs = I->first;
1914 SourceLocation MacroArgExpandedLoc = I->second;
1915 if (MacroArgExpandedLoc.isValid())
1916 return MacroArgExpandedLoc.getLocWithOffset(Offset - MacroArgBeginOffs);
1917
1918 return Loc;
1919}
1920
1921std::pair<FileID, unsigned>
1923 if (FID.isInvalid())
1924 return std::make_pair(FileID(), 0);
1925
1926 // Uses IncludedLocMap to retrieve/cache the decomposed loc.
1927
1928 using DecompTy = std::pair<FileID, unsigned>;
1929 auto InsertOp = IncludedLocMap.try_emplace(FID);
1930 DecompTy &DecompLoc = InsertOp.first->second;
1931 if (!InsertOp.second)
1932 return DecompLoc; // already in map.
1933
1934 SourceLocation UpperLoc;
1935 bool Invalid = false;
1936 const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &Invalid);
1937 if (!Invalid) {
1938 if (Entry.isExpansion())
1939 UpperLoc = Entry.getExpansion().getExpansionLocStart();
1940 else
1941 UpperLoc = Entry.getFile().getIncludeLoc();
1942 }
1943
1944 if (UpperLoc.isValid())
1945 DecompLoc = getDecomposedLoc(UpperLoc);
1946
1947 return DecompLoc;
1948}
1949
1951 assert(isLoadedSourceLocation(Loc) &&
1952 "Must be a source location in a loaded PCH/Module file");
1953
1954 auto [FID, Ignore] = getDecomposedLoc(Loc);
1955 // `LoadedSLocEntryAllocBegin` stores the sorted lowest FID of each loaded
1956 // allocation. Later allocations have lower FileIDs. The call below is to find
1957 // the lowest FID of a loaded allocation from any FID in the same allocation.
1958 // The lowest FID is used to identify a loaded allocation.
1959 const FileID *FirstFID =
1960 llvm::lower_bound(LoadedSLocEntryAllocBegin, FID, std::greater<FileID>{});
1961
1962 assert(FirstFID &&
1963 "The failure to find the first FileID of a "
1964 "loaded AST from a loaded source location was unexpected.");
1965 return *FirstFID;
1966}
1967
1969 const std::pair<FileID, unsigned> &LOffs,
1970 const std::pair<FileID, unsigned> &ROffs) const {
1971 // If one is local while the other is loaded.
1972 if (isLoadedFileID(LOffs.first) != isLoadedFileID(ROffs.first))
1973 return false;
1974
1975 if (isLoadedFileID(LOffs.first) && isLoadedFileID(ROffs.first)) {
1976 auto FindSLocEntryAlloc = [this](FileID FID) {
1977 // Loaded FileIDs are negative, we store the lowest FileID from each
1978 // allocation, later allocations have lower FileIDs.
1979 return llvm::lower_bound(LoadedSLocEntryAllocBegin, FID,
1980 std::greater<FileID>{});
1981 };
1982
1983 // If both are loaded from different AST files.
1984 if (FindSLocEntryAlloc(LOffs.first) != FindSLocEntryAlloc(ROffs.first))
1985 return false;
1986 }
1987
1988 return true;
1989}
1990
1991/// Given a decomposed source location, move it up the include/expansion stack
1992/// to the parent source location within the same translation unit. If this is
1993/// possible, return the decomposed version of the parent in Loc and return
1994/// false. If Loc is a top-level entry, return true and don't modify it.
1995static bool
1996MoveUpTranslationUnitIncludeHierarchy(std::pair<FileID, unsigned> &Loc,
1997 const SourceManager &SM) {
1998 std::pair<FileID, unsigned> UpperLoc = SM.getDecomposedIncludedLoc(Loc.first);
1999 if (UpperLoc.first.isInvalid() ||
2000 !SM.isInTheSameTranslationUnitImpl(UpperLoc, Loc))
2001 return true; // We reached the top.
2002
2003 Loc = UpperLoc;
2004 return false;
2005}
2006
2007/// Return the cache entry for comparing the given file IDs
2008/// for isBeforeInTranslationUnit.
2009InBeforeInTUCacheEntry &SourceManager::getInBeforeInTUCache(FileID LFID,
2010 FileID RFID) const {
2011 // This is a magic number for limiting the cache size. It was experimentally
2012 // derived from a small Objective-C project (where the cache filled
2013 // out to ~250 items). We can make it larger if necessary.
2014 // FIXME: this is almost certainly full these days. Use an LRU cache?
2015 enum { MagicCacheSize = 300 };
2016 IsBeforeInTUCacheKey Key(LFID, RFID);
2017
2018 // If the cache size isn't too large, do a lookup and if necessary default
2019 // construct an entry. We can then return it to the caller for direct
2020 // use. When they update the value, the cache will get automatically
2021 // updated as well.
2022 if (IBTUCache.size() < MagicCacheSize)
2023 return IBTUCache.try_emplace(Key, LFID, RFID).first->second;
2024
2025 // Otherwise, do a lookup that will not construct a new value.
2026 InBeforeInTUCache::iterator I = IBTUCache.find(Key);
2027 if (I != IBTUCache.end())
2028 return I->second;
2029
2030 // Fall back to the overflow value.
2031 IBTUCacheOverflow.setQueryFIDs(LFID, RFID);
2032 return IBTUCacheOverflow;
2033}
2034
2035/// Determines the order of 2 source locations in the translation unit.
2036///
2037/// \returns true if LHS source location comes before RHS, false otherwise.
2039 SourceLocation RHS) const {
2040 assert(LHS.isValid() && RHS.isValid() && "Passed invalid source location!");
2041 if (LHS == RHS)
2042 return false;
2043
2044 std::pair<FileID, unsigned> LOffs = getDecomposedLoc(LHS);
2045 std::pair<FileID, unsigned> ROffs = getDecomposedLoc(RHS);
2046
2047 // getDecomposedLoc may have failed to return a valid FileID because, e.g. it
2048 // is a serialized one referring to a file that was removed after we loaded
2049 // the PCH.
2050 if (LOffs.first.isInvalid() || ROffs.first.isInvalid())
2051 return LOffs.first.isInvalid() && !ROffs.first.isInvalid();
2052
2053 std::pair<bool, bool> InSameTU = isInTheSameTranslationUnit(LOffs, ROffs);
2054 if (InSameTU.first)
2055 return InSameTU.second;
2056 // This case is used by libclang: clang_isBeforeInTranslationUnit
2057 return LOffs.first < ROffs.first;
2058}
2059
2061 std::pair<FileID, unsigned> &LOffs,
2062 std::pair<FileID, unsigned> &ROffs) const {
2063 // If the source locations are not in the same TU, return early.
2064 if (!isInTheSameTranslationUnitImpl(LOffs, ROffs))
2065 return std::make_pair(false, false);
2066
2067 // If the source locations are in the same file, just compare offsets.
2068 if (LOffs.first == ROffs.first)
2069 return std::make_pair(true, LOffs.second < ROffs.second);
2070
2071 // If we are comparing a source location with multiple locations in the same
2072 // file, we get a big win by caching the result.
2073 InBeforeInTUCacheEntry &IsBeforeInTUCache =
2074 getInBeforeInTUCache(LOffs.first, ROffs.first);
2075
2076 // If we are comparing a source location with multiple locations in the same
2077 // file, we get a big win by caching the result.
2078 if (IsBeforeInTUCache.isCacheValid())
2079 return std::make_pair(
2080 true, IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second));
2081
2082 // Okay, we missed in the cache, we'll compute the answer and populate it.
2083 // We need to find the common ancestor. The only way of doing this is to
2084 // build the complete include chain for one and then walking up the chain
2085 // of the other looking for a match.
2086
2087 // A location within a FileID on the path up from LOffs to the main file.
2088 struct Entry {
2089 std::pair<FileID, unsigned> DecomposedLoc; // FileID redundant, but clearer.
2090 FileID ChildFID; // Used for breaking ties. Invalid for the initial loc.
2091 };
2092 llvm::SmallDenseMap<FileID, Entry, 16> LChain;
2093
2094 FileID LChild;
2095 do {
2096 LChain.try_emplace(LOffs.first, Entry{LOffs, LChild});
2097 // We catch the case where LOffs is in a file included by ROffs and
2098 // quit early. The other way round unfortunately remains suboptimal.
2099 if (LOffs.first == ROffs.first)
2100 break;
2101 LChild = LOffs.first;
2102 } while (!MoveUpTranslationUnitIncludeHierarchy(LOffs, *this));
2103
2104 FileID RChild;
2105 do {
2106 auto LIt = LChain.find(ROffs.first);
2107 if (LIt != LChain.end()) {
2108 // Compare the locations within the common file and cache them.
2109 LOffs = LIt->second.DecomposedLoc;
2110 LChild = LIt->second.ChildFID;
2111 // The relative order of LChild and RChild is a tiebreaker when
2112 // - locs expand to the same location (occurs in macro arg expansion)
2113 // - one loc is a parent of the other (we consider the parent as "first")
2114 // For the parent entry to be first, its invalid child file ID must
2115 // compare smaller to the valid child file ID of the other entry.
2116 // However loaded FileIDs are <0, so we perform *unsigned* comparison!
2117 // This changes the relative order of local vs loaded FileIDs, but it
2118 // doesn't matter as these are never mixed in macro expansion.
2119 unsigned LChildID = LChild.ID;
2120 unsigned RChildID = RChild.ID;
2121 assert(((LOffs.second != ROffs.second) ||
2122 (LChildID == 0 || RChildID == 0) ||
2123 isInSameSLocAddrSpace(getComposedLoc(LChild, 0),
2124 getComposedLoc(RChild, 0), nullptr)) &&
2125 "Mixed local/loaded FileIDs with same include location?");
2126 IsBeforeInTUCache.setCommonLoc(LOffs.first, LOffs.second, ROffs.second,
2127 LChildID < RChildID);
2128 return std::make_pair(
2129 true, IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second));
2130 }
2131 RChild = ROffs.first;
2132 } while (!MoveUpTranslationUnitIncludeHierarchy(ROffs, *this));
2133
2134 // If we found no match, the location is either in a built-ins buffer or
2135 // associated with global inline asm. PR5662 and PR22576 are examples.
2136
2137 StringRef LB = getBufferOrFake(LOffs.first).getBufferIdentifier();
2138 StringRef RB = getBufferOrFake(ROffs.first).getBufferIdentifier();
2139
2140 bool LIsBuiltins = LB == "<built-in>";
2141 bool RIsBuiltins = RB == "<built-in>";
2142 // Sort built-in before non-built-in.
2143 if (LIsBuiltins || RIsBuiltins) {
2144 if (LIsBuiltins != RIsBuiltins)
2145 return std::make_pair(true, LIsBuiltins);
2146 // Both are in built-in buffers, but from different files. We just claim
2147 // that lower IDs come first.
2148 return std::make_pair(true, LOffs.first < ROffs.first);
2149 }
2150
2151 bool LIsAsm = LB == "<inline asm>";
2152 bool RIsAsm = RB == "<inline asm>";
2153 // Sort assembler after built-ins, but before the rest.
2154 if (LIsAsm || RIsAsm) {
2155 if (LIsAsm != RIsAsm)
2156 return std::make_pair(true, RIsAsm);
2157 assert(LOffs.first == ROffs.first);
2158 return std::make_pair(true, false);
2159 }
2160
2161 bool LIsScratch = LB == "<scratch space>";
2162 bool RIsScratch = RB == "<scratch space>";
2163 // Sort scratch after inline asm, but before the rest.
2164 if (LIsScratch || RIsScratch) {
2165 if (LIsScratch != RIsScratch)
2166 return std::make_pair(true, LIsScratch);
2167 return std::make_pair(true, LOffs.second < ROffs.second);
2168 }
2169
2170 llvm_unreachable("Unsortable locations found");
2171}
2172
2174 llvm::errs() << "\n*** Source Manager Stats:\n";
2175 llvm::errs() << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
2176 << " mem buffers mapped.\n";
2177 llvm::errs() << LocalSLocEntryTable.size() << " local SLocEntries allocated ("
2178 << llvm::capacity_in_bytes(LocalSLocEntryTable)
2179 << " bytes of capacity), " << NextLocalOffset
2180 << "B of SLoc address space used.\n";
2181 llvm::errs() << LoadedSLocEntryTable.size()
2182 << " loaded SLocEntries allocated ("
2183 << llvm::capacity_in_bytes(LoadedSLocEntryTable)
2184 << " bytes of capacity), "
2185 << MaxLoadedOffset - CurrentLoadedOffset
2186 << "B of SLoc address space used.\n";
2187
2188 unsigned NumLineNumsComputed = 0;
2189 unsigned NumFileBytesMapped = 0;
2190 for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
2191 NumLineNumsComputed += bool(I->second->SourceLineCache);
2192 NumFileBytesMapped += I->second->getSizeBytesMapped();
2193 }
2194 unsigned NumMacroArgsComputed = MacroArgsCacheMap.size();
2195
2196 llvm::errs() << NumFileBytesMapped << " bytes of files mapped, "
2197 << NumLineNumsComputed << " files with line #'s computed, "
2198 << NumMacroArgsComputed << " files with macro args computed.\n";
2199 llvm::errs() << "FileID scans: " << NumLinearScans << " linear, "
2200 << NumBinaryProbes << " binary.\n";
2201}
2202
2203LLVM_DUMP_METHOD void SourceManager::dump() const {
2204 llvm::raw_ostream &out = llvm::errs();
2205
2206 auto DumpSLocEntry = [&](int ID, const SrcMgr::SLocEntry &Entry,
2207 std::optional<SourceLocation::UIntTy> NextStart) {
2208 out << "SLocEntry <FileID " << ID << "> " << (Entry.isFile() ? "file" : "expansion")
2209 << " <SourceLocation " << Entry.getOffset() << ":";
2210 if (NextStart)
2211 out << *NextStart << ">\n";
2212 else
2213 out << "???\?>\n";
2214 if (Entry.isFile()) {
2215 auto &FI = Entry.getFile();
2216 if (FI.NumCreatedFIDs)
2217 out << " covers <FileID " << ID << ":" << int(ID + FI.NumCreatedFIDs)
2218 << ">\n";
2219 if (FI.getIncludeLoc().isValid())
2220 out << " included from " << FI.getIncludeLoc().getOffset() << "\n";
2221 auto &CC = FI.getContentCache();
2222 out << " for " << (CC.OrigEntry ? CC.OrigEntry->getName() : "<none>")
2223 << "\n";
2224 if (CC.BufferOverridden)
2225 out << " contents overridden\n";
2226 if (CC.ContentsEntry != CC.OrigEntry) {
2227 out << " contents from "
2228 << (CC.ContentsEntry ? CC.ContentsEntry->getName() : "<none>")
2229 << "\n";
2230 }
2231 } else {
2232 auto &EI = Entry.getExpansion();
2233 out << " spelling from " << EI.getSpellingLoc().getOffset() << "\n";
2234 out << " macro " << (EI.isMacroArgExpansion() ? "arg" : "body")
2235 << " range <" << EI.getExpansionLocStart().getOffset() << ":"
2236 << EI.getExpansionLocEnd().getOffset() << ">\n";
2237 }
2238 };
2239
2240 // Dump local SLocEntries.
2241 for (unsigned ID = 0, NumIDs = LocalSLocEntryTable.size(); ID != NumIDs; ++ID) {
2242 DumpSLocEntry(ID, LocalSLocEntryTable[ID],
2243 ID == NumIDs - 1 ? NextLocalOffset
2244 : LocalSLocEntryTable[ID + 1].getOffset());
2245 }
2246 // Dump loaded SLocEntries.
2247 std::optional<SourceLocation::UIntTy> NextStart;
2248 for (unsigned Index = 0; Index != LoadedSLocEntryTable.size(); ++Index) {
2249 int ID = -(int)Index - 2;
2250 if (SLocEntryLoaded[Index]) {
2251 DumpSLocEntry(ID, LoadedSLocEntryTable[Index], NextStart);
2252 NextStart = LoadedSLocEntryTable[Index].getOffset();
2253 } else {
2254 NextStart = std::nullopt;
2255 }
2256 }
2257}
2258
2260 DiagnosticsEngine &Diag, std::optional<unsigned> MaxNotes) const {
2261 struct Info {
2262 // A location where this file was entered.
2264 // Number of times this FileEntry was entered.
2265 unsigned Inclusions = 0;
2266 // Size usage from the file itself.
2267 uint64_t DirectSize = 0;
2268 // Total size usage from the file and its macro expansions.
2269 uint64_t TotalSize = 0;
2270 };
2271 using UsageMap = llvm::MapVector<const FileEntry*, Info>;
2272
2273 UsageMap Usage;
2274 uint64_t CountedSize = 0;
2275
2276 auto AddUsageForFileID = [&](FileID ID) {
2277 // The +1 here is because getFileIDSize doesn't include the extra byte for
2278 // the one-past-the-end location.
2279 unsigned Size = getFileIDSize(ID) + 1;
2280
2281 // Find the file that used this address space, either directly or by
2282 // macro expansion.
2283 SourceLocation FileStart = getFileLoc(getComposedLoc(ID, 0));
2284 FileID FileLocID = getFileID(FileStart);
2285 const FileEntry *Entry = getFileEntryForID(FileLocID);
2286
2287 Info &EntryInfo = Usage[Entry];
2288 if (EntryInfo.Loc.isInvalid())
2289 EntryInfo.Loc = FileStart;
2290 if (ID == FileLocID) {
2291 ++EntryInfo.Inclusions;
2292 EntryInfo.DirectSize += Size;
2293 }
2294 EntryInfo.TotalSize += Size;
2295 CountedSize += Size;
2296 };
2297
2298 // Loaded SLocEntries have indexes counting downwards from -2.
2299 for (size_t Index = 0; Index != LoadedSLocEntryTable.size(); ++Index) {
2300 AddUsageForFileID(FileID::get(-2 - Index));
2301 }
2302 // Local SLocEntries have indexes counting upwards from 0.
2303 for (size_t Index = 0; Index != LocalSLocEntryTable.size(); ++Index) {
2304 AddUsageForFileID(FileID::get(Index));
2305 }
2306
2307 // Sort the usage by size from largest to smallest. Break ties by raw source
2308 // location.
2309 auto SortedUsage = Usage.takeVector();
2310 auto Cmp = [](const UsageMap::value_type &A, const UsageMap::value_type &B) {
2311 return A.second.TotalSize > B.second.TotalSize ||
2312 (A.second.TotalSize == B.second.TotalSize &&
2313 A.second.Loc < B.second.Loc);
2314 };
2315 auto SortedEnd = SortedUsage.end();
2316 if (MaxNotes && SortedUsage.size() > *MaxNotes) {
2317 SortedEnd = SortedUsage.begin() + *MaxNotes;
2318 std::nth_element(SortedUsage.begin(), SortedEnd, SortedUsage.end(), Cmp);
2319 }
2320 std::sort(SortedUsage.begin(), SortedEnd, Cmp);
2321
2322 // Produce note on sloc address space usage total.
2323 uint64_t LocalUsage = NextLocalOffset;
2324 uint64_t LoadedUsage = MaxLoadedOffset - CurrentLoadedOffset;
2325 int UsagePercent = static_cast<int>(100.0 * double(LocalUsage + LoadedUsage) /
2326 MaxLoadedOffset);
2327 Diag.Report(diag::note_total_sloc_usage)
2328 << LocalUsage << LoadedUsage << (LocalUsage + LoadedUsage)
2329 << UsagePercent;
2330
2331 // Produce notes on sloc address space usage for each file with a high usage.
2332 uint64_t ReportedSize = 0;
2333 for (auto &[Entry, FileInfo] :
2334 llvm::make_range(SortedUsage.begin(), SortedEnd)) {
2335 Diag.Report(FileInfo.Loc, diag::note_file_sloc_usage)
2336 << FileInfo.Inclusions << FileInfo.DirectSize
2337 << (FileInfo.TotalSize - FileInfo.DirectSize);
2338 ReportedSize += FileInfo.TotalSize;
2339 }
2340
2341 // Describe any remaining usage not reported in the per-file usage.
2342 if (ReportedSize != CountedSize) {
2343 Diag.Report(diag::note_file_misc_sloc_usage)
2344 << (SortedUsage.end() - SortedEnd) << CountedSize - ReportedSize;
2345 }
2346}
2347
2349
2350/// Return the amount of memory used by memory buffers, breaking down
2351/// by heap-backed versus mmap'ed memory.
2353 size_t malloc_bytes = 0;
2354 size_t mmap_bytes = 0;
2355
2356 for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i)
2357 if (size_t sized_mapped = MemBufferInfos[i]->getSizeBytesMapped())
2358 switch (MemBufferInfos[i]->getMemoryBufferKind()) {
2359 case llvm::MemoryBuffer::MemoryBuffer_MMap:
2360 mmap_bytes += sized_mapped;
2361 break;
2362 case llvm::MemoryBuffer::MemoryBuffer_Malloc:
2363 malloc_bytes += sized_mapped;
2364 break;
2365 }
2366
2367 return MemoryBufferSizes(malloc_bytes, mmap_bytes);
2368}
2369
2371 size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
2372 llvm::capacity_in_bytes(LocalSLocEntryTable) +
2373 llvm::capacity_in_bytes(LoadedSLocEntryTable) +
2374 llvm::capacity_in_bytes(SLocEntryLoaded) +
2375 llvm::capacity_in_bytes(FileInfos);
2376
2377 if (OverriddenFilesInfo)
2378 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
2379
2380 return size;
2381}
2382
2384 StringRef Content) {
2385 // This is referenced by `FileMgr` and will be released by `FileMgr` when it
2386 // is deleted.
2388 new llvm::vfs::InMemoryFileSystem);
2389 InMemoryFileSystem->addFile(
2390 FileName, 0,
2391 llvm::MemoryBuffer::getMemBuffer(Content, FileName,
2392 /*RequiresNullTerminator=*/false));
2393 // This is passed to `SM` as reference, so the pointer has to be referenced
2394 // in `Environment` so that `FileMgr` can out-live this function scope.
2395 FileMgr =
2396 std::make_unique<FileManager>(FileSystemOptions(), InMemoryFileSystem);
2397 // This is passed to `SM` as reference, so the pointer has to be referenced
2398 // by `Environment` due to the same reason above.
2399 Diagnostics = std::make_unique<DiagnosticsEngine>(
2401 new DiagnosticOptions);
2402 SourceMgr = std::make_unique<SourceManager>(*Diagnostics, *FileMgr);
2403 FileEntryRef FE = llvm::cantFail(FileMgr->getFileRef(FileName));
2404 FileID ID =
2405 SourceMgr->createFileID(FE, SourceLocation(), clang::SrcMgr::C_User);
2406 assert(ID.isValid());
2407 SourceMgr->setMainFileID(ID);
2408}
#define SM(sm)
Definition: Cuda.cpp:84
Defines the Diagnostic-related interfaces.
Expr * E
Defines the clang::FileManager interface and associated types.
StringRef Filename
Definition: Format.cpp:3032
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
static DiagnosticBuilder Diag(DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd, unsigned DiagID)
Produce a diagnostic highlighting some portion of a literal.
static ParseState advance(ParseState S, size_t N)
Definition: Parsing.cpp:144
SourceLocation Loc
Definition: SemaObjC.cpp:759
Defines the clang::SourceLocation class and associated facilities.
Defines implementation details of the clang::SourceManager class.
static constexpr T likelyhasbetween(T x, unsigned char m, unsigned char n)
static bool MoveUpTranslationUnitIncludeHierarchy(std::pair< FileID, unsigned > &Loc, const SourceManager &SM)
Given a decomposed source location, move it up the include/expansion stack to the parent source locat...
static bool isInvalid(LocType Loc, bool *Invalid)
STATISTIC(MaxUsedSLocBytes, "Maximum number of bytes used by source locations " "(both loaded and local).")
bool needConversion(StringRef Filename)
Helper function to determine if an input file requires conversion.
Defines the SourceManager interface.
__device__ double
__device__ int
#define bool
Definition: amdgpuintrin.h:20
Represents a character-granular source range.
void setEnd(SourceLocation e)
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
void setBegin(SourceLocation b)
SourceLocation getEnd() const
SourceLocation getBegin() const
void setTokenRange(bool TR)
Used for handling and querying diagnostic IDs.
Options for controlling the compiler diagnostics engine.
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:231
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1493
void setSourceManager(SourceManager *SrcMgr)
Definition: Diagnostic.h:612
virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset)=0
Get the index ID for the loaded SourceLocation offset.
virtual bool ReadSLocEntry(int ID)=0
Read the source location entry with index ID, which will always be less than -1.
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition: FileEntry.h:57
bool isNamedPipe() const
Definition: FileEntry.h:359
off_t getSize() const
Definition: FileEntry.h:347
StringRef getName() const
The name of this FileEntry.
Definition: FileEntry.h:61
Cached information about one file (either on disk or in the virtual file system).
Definition: FileEntry.h:305
unsigned getUID() const
Definition: FileEntry.h:333
off_t getSize() const
Definition: FileEntry.h:330
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool isValid() const
bool isInvalid() const
Implements support for file system lookup, file system caching, and directory search management.
Definition: FileManager.h:53
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(FileEntryRef Entry, bool isVolatile=false, bool RequiresNullTerminator=true, std::optional< int64_t > MaybeLimit=std::nullopt, bool IsText=true)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
OptionalFileEntryRef getBypassFile(FileEntryRef VFE)
Retrieve a FileEntry that bypasses VFE, which is expected to be a virtual file entry,...
Keeps track of options that affect how file operations are performed.
Holds the cache used by isBeforeInTranslationUnit.
void setCommonLoc(FileID commonFID, unsigned lCommonOffset, unsigned rCommonOffset, bool LParentBeforeRParent)
bool getCachedResult(unsigned LOffset, unsigned ROffset) const
If the cache is valid, compute the result given the specified offsets in the LHS/RHS FileID's.
bool isCacheValid() const
Return true if the currently cached values match up with the specified LHS/RHS query.
Used to hold and unique data used to represent #line information.
const LineEntry * FindNearestLineEntry(FileID FID, unsigned Offset)
Find the line entry nearest to FID that is before it.
unsigned getLineTableFilenameID(StringRef Str)
void AddEntry(FileID FID, const std::vector< LineEntry > &Entries)
Add a new line entry that has already been encoded into the internal representation of the line table...
void AddLineNote(FileID FID, unsigned Offset, unsigned LineNo, int FilenameID, unsigned EntryExit, SrcMgr::CharacteristicKind FileKind)
Add a line note to the line table that indicates that there is a #line or GNU line marker at the spec...
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
unsigned getLine() const
Return the presumed line number of this location.
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
SourceManagerForFile(StringRef FileName, StringRef Content)
Creates SourceManager and necessary dependencies (e.g.
This class handles loading and caching of source files into memory.
std::optional< StringRef > getNonBuiltinFilenameForID(FileID FID) const
Returns the filename for the provided FileID, unless it's a built-in buffer that's not represented by...
bool isMacroBodyExpansion(SourceLocation Loc) const
Tests whether the given source location represents the expansion of a macro body.
unsigned getPresumedLineNumber(SourceLocation Loc, bool *Invalid=nullptr) const
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
bool isAtEndOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroEnd=nullptr) const
Returns true if the given MacroID location points at the character end of the immediate macro expansi...
unsigned getColumnNumber(FileID FID, unsigned FilePos, bool *Invalid=nullptr) const
Return the column # for the specified file position.
void noteSLocAddressSpaceUsage(DiagnosticsEngine &Diag, std::optional< unsigned > MaxNotes=32) const
bool isInMainFile(SourceLocation Loc) const
Returns whether the PresumedLoc for a given SourceLocation is in the main file.
void AddLineNote(SourceLocation Loc, unsigned LineNo, int FilenameID, bool IsFileEntry, bool IsFileExit, SrcMgr::CharacteristicKind FileKind)
Add a line note to the line table for the FileID and offset specified by Loc.
SourceManager(DiagnosticsEngine &Diag, FileManager &FileMgr, bool UserFilesAreVolatile=false)
SourceLocation createTokenSplitLoc(SourceLocation SpellingLoc, SourceLocation TokenStart, SourceLocation TokenEnd)
Return a new SourceLocation that encodes that the token starting at TokenStart ends prematurely at To...
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
MemoryBufferSizes getMemoryBufferSizes() const
Return the amount of memory used by memory buffers, breaking down by heap-backed versus mmap'ed memor...
void setFileIsTransient(FileEntryRef SourceFile)
Specify that a file is transient.
bool isFileOverridden(const FileEntry *File) const
Returns true if the file contents have been overridden.
OptionalFileEntryRef getFileEntryRefForID(FileID FID) const
Returns the FileEntryRef for the provided FileID.
SourceLocation translateLineCol(FileID FID, unsigned Line, unsigned Col) const
Get the source location in FID for the given line:col.
StringRef getBufferName(SourceLocation Loc, bool *Invalid=nullptr) const
Return the filename or buffer identifier of the buffer the location is in.
SourceLocation getTopMacroCallerLoc(SourceLocation Loc) const
std::optional< StringRef > getBufferDataOrNone(FileID FID) const
Return a StringRef to the source buffer data for the specified FileID, returning std::nullopt if inva...
unsigned getExpansionColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
FileID translateFile(const FileEntry *SourceFile) const
Get the FileID for the given file.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
FileID createFileID(FileEntryRef SourceFile, SourceLocation IncludePos, SrcMgr::CharacteristicKind FileCharacter, int LoadedID=0, SourceLocation::UIntTy LoadedOffset=0)
Create a new FileID that represents the specified file being #included from the specified IncludePosi...
void PrintStats() const
Print statistics to stderr.
FileID getUniqueLoadedASTFileID(SourceLocation Loc) const
bool isMainFile(const FileEntry &SourceFile)
Returns true when the given FileEntry corresponds to the main file.
size_t getDataStructureSizes() const
Return the amount of memory used for various side tables and data structures in the SourceManager.
bool isMacroArgExpansion(SourceLocation Loc, SourceLocation *StartLoc=nullptr) const
Tests whether the given source location represents a macro argument's expansion into the function-lik...
bool isInTheSameTranslationUnitImpl(const std::pair< FileID, unsigned > &LOffs, const std::pair< FileID, unsigned > &ROffs) const
Determines whether the two decomposed source location is in the same TU.
const SrcMgr::SLocEntry & getLocalSLocEntry(unsigned Index) const
Get a local SLocEntry. This is exposed for indexing.
OptionalFileEntryRef bypassFileContentsOverride(FileEntryRef File)
Bypass the overridden contents of a file.
FileManager & getFileManager() const
unsigned local_sloc_entry_size() const
Get the number of local SLocEntries we have.
std::optional< StringRef > getBufferDataIfLoaded(FileID FID) const
Return a StringRef to the source buffer data for the specified FileID, returning std::nullopt if it's...
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
std::pair< int, SourceLocation::UIntTy > AllocateLoadedSLocEntries(unsigned NumSLocEntries, SourceLocation::UIntTy TotalSize)
Allocate a number of loaded SLocEntries, which will be actually loaded on demand from the external so...
void overrideFileContents(FileEntryRef SourceFile, const llvm::MemoryBufferRef &Buffer)
Override the contents of the given source file by providing an already-allocated buffer.
unsigned getSpellingLineNumber(SourceLocation Loc, bool *Invalid=nullptr) const
unsigned getFileIDSize(FileID FID) const
The size of the SLocEntry that FID represents.
unsigned getLineNumber(FileID FID, unsigned FilePos, bool *Invalid=nullptr) const
Given a SourceLocation, return the spelling line number for the position indicated.
std::pair< bool, bool > isInTheSameTranslationUnit(std::pair< FileID, unsigned > &LOffs, std::pair< FileID, unsigned > &ROffs) const
Determines whether the two decomposed source location is in the same translation unit.
llvm::DenseMap< FileEntryRef, SrcMgr::ContentCache * >::const_iterator fileinfo_iterator
CharSourceRange getImmediateExpansionRange(SourceLocation Loc) const
Return the start/end of the expansion information for an expansion location.
unsigned getSpellingColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
CharSourceRange getExpansionRange(SourceLocation Loc) const
Given a SourceLocation object, return the range of tokens covered by the expansion in the ultimate fi...
bool isInFileID(SourceLocation Loc, FileID FID, unsigned *RelativeOffset=nullptr) const
Given a specific FileID, returns true if Loc is inside that FileID chunk and sets relative offset (of...
unsigned getLineTableFilenameID(StringRef Str)
Return the uniqued ID for the specified filename.
std::pair< FileID, unsigned > getDecomposedExpansionLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
unsigned getExpansionLineNumber(SourceLocation Loc, bool *Invalid=nullptr) const
void initializeForReplay(const SourceManager &Old)
Initialize this source manager suitably to replay the compilation described by Old.
FileID getOrCreateFileID(FileEntryRef SourceFile, SrcMgr::CharacteristicKind FileCharacter)
Get the FileID for SourceFile if it exists.
SourceLocation translateFileLineCol(const FileEntry *SourceFile, unsigned Line, unsigned Col) const
Get the source location for the given file:line:col triplet.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
const FileEntry * getFileEntryForID(FileID FID) const
Returns the FileEntry record for the provided FileID.
std::pair< FileID, unsigned > getDecomposedSpellingLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
bool isAtStartOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroBegin=nullptr) const
Returns true if the given MacroID location points at the beginning of the immediate macro expansion.
SrcMgr::CharacteristicKind getFileCharacteristic(SourceLocation Loc) const
Return the file characteristic of the specified source location, indicating whether this is a normal ...
SourceLocation createExpansionLoc(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned Length, bool ExpansionIsTokenRange=true, int LoadedID=0, SourceLocation::UIntTy LoadedOffset=0)
Creates an expansion SLocEntry for a macro use.
unsigned getPresumedColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
std::pair< FileID, unsigned > getDecomposedIncludedLoc(FileID FID) const
Returns the "included/expanded in" decomposed location of the given FileID.
StringRef getFilename(SourceLocation SpellingLoc) const
Return the filename of the file containing a SourceLocation.
SourceLocation getMacroArgExpandedLocation(SourceLocation Loc) const
If Loc points inside a function macro argument, the returned location will be the macro location in w...
bool isBeforeInTranslationUnit(SourceLocation LHS, SourceLocation RHS) const
Determines the order of 2 source locations in the translation unit.
std::optional< llvm::MemoryBufferRef > getBufferOrNone(FileID FID, SourceLocation Loc=SourceLocation()) const
Return the buffer for the specified FileID.
LineTableInfo & getLineTable()
Retrieve the stored line table.
SourceLocation getImmediateSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID.
std::optional< llvm::MemoryBufferRef > getMemoryBufferForFileOrNone(FileEntryRef File)
Retrieve the memory buffer associated with the given file.
const SrcMgr::SLocEntry & getSLocEntry(FileID FID, bool *Invalid=nullptr) const
SourceLocation createMacroArgExpansionLoc(SourceLocation SpellingLoc, SourceLocation ExpansionLoc, unsigned Length)
Creates an expansion SLocEntry for the substitution of an argument into a function-like macro's body.
A trivial tuple used to represent a source range.
One instance of this struct is kept for every file loaded or used.
void setBuffer(std::unique_ptr< llvm::MemoryBuffer > B)
Set the buffer.
std::optional< StringRef > getBufferDataIfLoaded() const
Return a StringRef to the source buffer data, only if it has already been loaded.
OptionalFileEntryRef ContentsEntry
References the file which the contents were actually loaded from.
unsigned getSizeBytesMapped() const
Returns the number of bytes actually mapped for this ContentCache.
unsigned IsTransient
True if this file may be transient, that is, if it might not exist at some later point in time when t...
unsigned getSize() const
Returns the size of the content encapsulated by this ContentCache.
llvm::MemoryBuffer::BufferKind getMemoryBufferKind() const
Returns the kind of memory used to back the memory buffer for this content cache.
unsigned IsFileVolatile
True if this content cache was initially created for a source file considered to be volatile (likely ...
LineOffsetMapping SourceLineCache
A bump pointer allocated array of offsets for each source line.
std::optional< llvm::MemoryBufferRef > getBufferOrNone(DiagnosticsEngine &Diag, FileManager &FM, SourceLocation Loc=SourceLocation()) const
Returns the memory buffer for the associated content.
static const char * getInvalidBOM(StringRef BufStr)
unsigned BufferOverridden
Indicates whether the buffer itself was provided to override the actual file contents.
OptionalFileEntryRef OrigEntry
Reference to the file entry representing this ContentCache.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
static ExpansionInfo create(SourceLocation SpellingLoc, SourceLocation Start, SourceLocation End, bool ExpansionIsTokenRange=true)
Return a ExpansionInfo for an expansion.
SourceLocation getSpellingLoc() const
CharSourceRange getExpansionLocRange() const
static ExpansionInfo createForMacroArg(SourceLocation SpellingLoc, SourceLocation ExpansionLoc)
Return a special ExpansionInfo for the expansion of a macro argument into a function-like macro's bod...
static ExpansionInfo createForTokenSplit(SourceLocation SpellingLoc, SourceLocation Start, SourceLocation End)
Return a special ExpansionInfo representing a token that ends prematurely.
SourceLocation getExpansionLocEnd() const
Information about a FileID, basically just the logical file that it represents and include stack info...
const ContentCache & getContentCache() const
CharacteristicKind getFileCharacteristic() const
Return whether this is a system header or not.
static FileInfo get(SourceLocation IL, ContentCache &Con, CharacteristicKind FileCharacter, StringRef Filename)
Return a FileInfo object.
bool hasLineDirectives() const
Return true if this FileID has #line directives in it.
void setHasLineDirectives()
Set the flag that indicates that this FileID has line table entries associated with it.
SourceLocation getIncludeLoc() const
StringRef getName() const
Returns the name of the file that was used when the file was loaded from the underlying file system.
Mapping of line offsets into a source file.
const unsigned * begin() const
const unsigned * end() const
static LineOffsetMapping get(llvm::MemoryBufferRef Buffer, llvm::BumpPtrAllocator &Alloc)
This is a discriminated union of FileInfo and ExpansionInfo.
SourceLocation::UIntTy getOffset() const
static SLocEntry get(SourceLocation::UIntTy Offset, const FileInfo &FI)
const FileInfo & getFile() const
const ExpansionInfo & getExpansion() const
The type-property cache.
Definition: Type.cpp:4501
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
Definition: SourceManager.h:81
bool isSystem(CharacteristicKind CK)
Determine whether a file / directory characteristic is for system code.
Definition: SourceManager.h:90
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T
SrcMgr::CharacteristicKind FileKind
Set the 0 if no flags, 1 if a system header,.
static LineEntry get(unsigned Offs, unsigned Line, int Filename, SrcMgr::CharacteristicKind FileKind, unsigned IncludeOffset)