13#ifndef LLVM_CLANG_LEX_LEXER_H
14#define LLVM_CLANG_LEX_LEXER_H
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringRef.h"
37class DiagnosticBuilder;
81 void anchor()
override;
87 const char *BufferStart;
90 const char *BufferEnd;
122 unsigned char ExtendedTokenMode;
131 const char *BufferPtr;
135 bool IsAtStartOfLine;
137 bool IsAtPhysicalStartOfLine;
139 bool HasLeadingSpace;
141 bool HasLeadingEmptyMacro;
144 bool IsFirstTimeLexingFile;
148 const char *NewLinePtr;
158 unsigned NextDepDirectiveTokenIndex = 0;
160 void InitLexer(
const char *BufStart,
const char *BufPtr,
const char *BufEnd);
168 bool IsFirstIncludeOfFile =
true);
174 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
175 bool IsFirstIncludeOfFile =
true);
182 bool IsFirstIncludeOfFile =
true);
211 bool LexDependencyDirectiveTokenWhileSkipping(
Token &
Result);
215 bool isDependencyDirectivesLexer()
const {
return !DepDirectives.empty(); }
220 const char *convertDependencyDirectiveToken(
221 const dependency_directives_scan::Token &DDTok, Token &
Result);
241 return BufferPtr == BufferEnd;
249 return ExtendedTokenMode > 1;
256 "Can only retain whitespace in raw mode or -traditional-cpp");
257 ExtendedTokenMode = Val ? 2 : 0;
263 return ExtendedTokenMode > 0;
271 "Can't play with comment retention state when retaining whitespace");
272 ExtendedTokenMode = Mode ? 1 : 0;
285 return StringRef(BufferStart, BufferEnd - BufferStart);
312 assert(BufferPtr >= BufferStart &&
"Invalid buffer state");
313 return BufferPtr - BufferStart;
317 void seek(
unsigned Offset,
bool IsAtStartOfLine);
322 static std::string
Stringify(StringRef Str,
bool Charify =
false);
365 bool *invalid =
nullptr);
380 bool IgnoreWhiteSpace =
false);
436 Range.getBegin(), End);
441 return Range.isTokenRange()
550 unsigned MaxLines = 0);
558 bool IncludeComments =
false);
569 bool SkipTrailingWhitespaceAndNewLine);
591 if (isObviouslySimpleCharacter(Ptr[0])) {
595 return getCharAndSizeSlowNoWarn(Ptr, LangOpts);
613 bool LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine);
615 bool CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
const char *CurPtr);
617 bool LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
const char *CurPtr);
624 void FormTokenWithChars(
Token &
Result,
const char *TokEnd,
626 unsigned TokLen = TokEnd-BufferPtr;
636 unsigned isNextPPTokenLParen();
660 static bool isObviouslySimpleCharacter(
char C) {
661 return C !=
'?' &&
C !=
'\\';
668 inline char getAndAdvanceChar(
const char *&Ptr, Token &Tok) {
671 if (isObviouslySimpleCharacter(Ptr[0]))
return *Ptr++;
673 auto [
C,
Size] = getCharAndSizeSlow(Ptr, &Tok);
682 const char *ConsumeChar(
const char *Ptr,
unsigned Size, Token &Tok) {
689 return Ptr + getCharAndSizeSlow(Ptr, &Tok).
Size;
696 inline char getCharAndSize(
const char *Ptr,
unsigned &Size) {
699 if (isObviouslySimpleCharacter(Ptr[0])) {
704 auto CharAndSize = getCharAndSizeSlow(Ptr);
705 Size = CharAndSize.Size;
706 return CharAndSize.Char;
711 SizedChar getCharAndSizeSlow(
const char *Ptr, Token *Tok =
nullptr);
716 static unsigned getEscapedNewLineSize(
const char *
P);
721 static const char *SkipEscapedNewLines(
const char *
P);
725 static SizedChar getCharAndSizeSlowNoWarn(
const char *Ptr,
726 const LangOptions &LangOpts);
731 void SetByteOffset(
unsigned Offset,
bool StartOfLine);
733 void PropagateLineStartLeadingSpaceInfo(Token &
Result);
735 const char *LexUDSuffix(Token &
Result,
const char *CurPtr,
736 bool IsStringLiteral);
742 bool LexIdentifierContinue(Token &
Result,
const char *CurPtr);
744 bool LexNumericConstant (Token &
Result,
const char *CurPtr);
745 bool LexStringLiteral (Token &
Result,
const char *CurPtr,
747 bool LexRawStringLiteral (Token &
Result,
const char *CurPtr,
749 bool LexAngledStringLiteral(Token &
Result,
const char *CurPtr);
750 bool LexCharConstant (Token &
Result,
const char *CurPtr,
752 bool LexEndOfFile (Token &
Result,
const char *CurPtr);
753 bool SkipWhitespace (Token &
Result,
const char *CurPtr,
754 bool &TokAtPhysicalStartOfLine);
755 bool SkipLineComment (Token &
Result,
const char *CurPtr,
756 bool &TokAtPhysicalStartOfLine);
757 bool SkipBlockComment (Token &
Result,
const char *CurPtr,
758 bool &TokAtPhysicalStartOfLine);
759 bool SaveLineComment (Token &
Result,
const char *CurPtr);
761 bool IsStartOfConflictMarker(
const char *CurPtr);
762 bool HandleEndOfConflictMarker(
const char *CurPtr);
764 bool lexEditorPlaceholder(Token &
Result,
const char *CurPtr);
766 bool isCodeCompletionPoint(
const char *CurPtr)
const;
767 void cutOffLexing() { BufferPtr = BufferEnd; }
769 bool isHexaLiteral(
const char *Start,
const LangOptions &LangOpts);
771 void codeCompleteIncludedFile(
const char *PathStart,
772 const char *CompletionPoint,
bool IsAngled);
774 std::optional<uint32_t>
775 tryReadNumericUCN(
const char *&StartPtr,
const char *SlashLoc, Token *
Result);
776 std::optional<uint32_t> tryReadNamedUCN(
const char *&StartPtr,
777 const char *SlashLoc, Token *
Result);
791 uint32_t tryReadUCN(
const char *&StartPtr,
const char *SlashLoc, Token *
Result);
804 bool tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
813 bool tryConsumeIdentifierUTF8Char(
const char *&CurPtr, Token &
Result);
enum clang::sema::@1724::IndirectLocalPathEntry::EntryKind Kind
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
Defines the clang::LangOptions interface.
Defines the PreprocessorLexer interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the clang::TokenKind enum and support functions.
__device__ __2f16 float c
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
A little helper class used to produce diagnostics.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Given a token range, produce a corresponding CharSourceRange that is not a token range.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
StringRef getBuffer() const
Gets source code buffer.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
unsigned getCurrentBufferOffset()
Returns the current lexing offset.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getFileLoc() const
getFileLoc - Return the File Location for the file we are lexing out of.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Lexer & operator=(const Lexer &)=delete
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
Lexer(const Lexer &)=delete
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
bool isFirstTimeLexingFile() const
Check if this is the first time we're lexing the input file.
bool LexingRawMode
True if in raw mode.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Token - This structure provides full information about a lexed token.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
@ Result
The result type of a method or function.
Diagnostic wrappers for TextAPI types for error reporting.
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
unsigned Size
Size of the preamble in bytes.
bool PreambleEndsAtStartOfLine
Whether the preamble ends at the start of a new line.
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)