13#ifndef LLVM_CLANG_LEX_LEXER_H
14#define LLVM_CLANG_LEX_LEXER_H
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringRef.h"
37class DiagnosticBuilder;
81 void anchor()
override;
87 const char *BufferStart;
90 const char *BufferEnd;
122 unsigned char ExtendedTokenMode;
131 const char *BufferPtr;
135 bool IsAtStartOfLine;
137 bool IsAtPhysicalStartOfLine;
139 bool HasLeadingSpace;
141 bool HasLeadingEmptyMacro;
144 bool IsFirstTimeLexingFile;
148 const char *NewLinePtr;
158 unsigned NextDepDirectiveTokenIndex = 0;
160 void InitLexer(
const char *BufStart,
const char *BufPtr,
const char *BufEnd);
168 bool IsFirstIncludeOfFile =
true);
174 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
175 bool IsFirstIncludeOfFile =
true);
182 bool IsFirstIncludeOfFile =
true);
211 bool LexDependencyDirectiveTokenWhileSkipping(
Token &
Result);
215 bool isDependencyDirectivesLexer()
const {
return !DepDirectives.empty(); }
220 const char *convertDependencyDirectiveToken(
221 const dependency_directives_scan::Token &DDTok, Token &
Result);
241 return BufferPtr == BufferEnd;
249 return ExtendedTokenMode > 1;
256 "Can only retain whitespace in raw mode or -traditional-cpp");
257 ExtendedTokenMode = Val ? 2 : 0;
263 return ExtendedTokenMode > 0;
271 "Can't play with comment retention state when retaining whitespace");
272 ExtendedTokenMode = Mode ? 1 : 0;
285 return StringRef(BufferStart, BufferEnd - BufferStart);
312 assert(BufferPtr >= BufferStart &&
"Invalid buffer state");
313 return BufferPtr - BufferStart;
317 void seek(
unsigned Offset,
bool IsAtStartOfLine);
322 static std::string
Stringify(StringRef Str,
bool Charify =
false);
365 bool *invalid =
nullptr);
380 bool IgnoreWhiteSpace =
false);
436 Range.getBegin(), End);
441 return Range.isTokenRange()
550 unsigned MaxLines = 0);
568 bool SkipTrailingWhitespaceAndNewLine);
590 if (isObviouslySimpleCharacter(Ptr[0])) {
594 return getCharAndSizeSlowNoWarn(Ptr, LangOpts);
612 bool LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine);
614 bool CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
const char *CurPtr);
616 bool LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
const char *CurPtr);
623 void FormTokenWithChars(
Token &
Result,
const char *TokEnd,
625 unsigned TokLen = TokEnd-BufferPtr;
635 unsigned isNextPPTokenLParen();
659 static bool isObviouslySimpleCharacter(
char C) {
660 return C !=
'?' &&
C !=
'\\';
667 inline char getAndAdvanceChar(
const char *&Ptr, Token &Tok) {
670 if (isObviouslySimpleCharacter(Ptr[0]))
return *Ptr++;
672 auto [
C,
Size] = getCharAndSizeSlow(Ptr, &Tok);
681 const char *ConsumeChar(
const char *Ptr,
unsigned Size, Token &Tok) {
688 return Ptr + getCharAndSizeSlow(Ptr, &Tok).
Size;
695 inline char getCharAndSize(
const char *Ptr,
unsigned &Size) {
698 if (isObviouslySimpleCharacter(Ptr[0])) {
703 auto CharAndSize = getCharAndSizeSlow(Ptr);
704 Size = CharAndSize.Size;
705 return CharAndSize.Char;
710 SizedChar getCharAndSizeSlow(
const char *Ptr, Token *Tok =
nullptr);
715 static unsigned getEscapedNewLineSize(
const char *
P);
720 static const char *SkipEscapedNewLines(
const char *
P);
724 static SizedChar getCharAndSizeSlowNoWarn(
const char *Ptr,
725 const LangOptions &LangOpts);
730 void SetByteOffset(
unsigned Offset,
bool StartOfLine);
732 void PropagateLineStartLeadingSpaceInfo(Token &
Result);
734 const char *LexUDSuffix(Token &
Result,
const char *CurPtr,
735 bool IsStringLiteral);
741 bool LexIdentifierContinue(Token &
Result,
const char *CurPtr);
743 bool LexNumericConstant (Token &
Result,
const char *CurPtr);
744 bool LexStringLiteral (Token &
Result,
const char *CurPtr,
746 bool LexRawStringLiteral (Token &
Result,
const char *CurPtr,
748 bool LexAngledStringLiteral(Token &
Result,
const char *CurPtr);
749 bool LexCharConstant (Token &
Result,
const char *CurPtr,
751 bool LexEndOfFile (Token &
Result,
const char *CurPtr);
752 bool SkipWhitespace (Token &
Result,
const char *CurPtr,
753 bool &TokAtPhysicalStartOfLine);
754 bool SkipLineComment (Token &
Result,
const char *CurPtr,
755 bool &TokAtPhysicalStartOfLine);
756 bool SkipBlockComment (Token &
Result,
const char *CurPtr,
757 bool &TokAtPhysicalStartOfLine);
758 bool SaveLineComment (Token &
Result,
const char *CurPtr);
760 bool IsStartOfConflictMarker(
const char *CurPtr);
761 bool HandleEndOfConflictMarker(
const char *CurPtr);
763 bool lexEditorPlaceholder(Token &
Result,
const char *CurPtr);
765 bool isCodeCompletionPoint(
const char *CurPtr)
const;
766 void cutOffLexing() { BufferPtr = BufferEnd; }
768 bool isHexaLiteral(
const char *Start,
const LangOptions &LangOpts);
770 void codeCompleteIncludedFile(
const char *PathStart,
771 const char *CompletionPoint,
bool IsAngled);
773 std::optional<uint32_t>
774 tryReadNumericUCN(
const char *&StartPtr,
const char *SlashLoc, Token *
Result);
775 std::optional<uint32_t> tryReadNamedUCN(
const char *&StartPtr,
776 const char *SlashLoc, Token *
Result);
790 uint32_t tryReadUCN(
const char *&StartPtr,
const char *SlashLoc, Token *
Result);
803 bool tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
812 bool tryConsumeIdentifierUTF8Char(
const char *&CurPtr, Token &
Result);
enum clang::sema::@1718::IndirectLocalPathEntry::EntryKind Kind
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
Defines the clang::LangOptions interface.
Defines the PreprocessorLexer interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the clang::TokenKind enum and support functions.
__device__ __2f16 float c
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
A little helper class used to produce diagnostics.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Given a token range, produce a corresponding CharSourceRange that is not a token range.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
StringRef getBuffer() const
Gets source code buffer.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
unsigned getCurrentBufferOffset()
Returns the current lexing offset.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getFileLoc() const
getFileLoc - Return the File Location for the file we are lexing out of.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Lexer & operator=(const Lexer &)=delete
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
Lexer(const Lexer &)=delete
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the token that comes right after the given location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
bool isFirstTimeLexingFile() const
Check if this is the first time we're lexing the input file.
bool LexingRawMode
True if in raw mode.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Token - This structure provides full information about a lexed token.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
@ Result
The result type of a method or function.
Diagnostic wrappers for TextAPI types for error reporting.
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
unsigned Size
Size of the preamble in bytes.
bool PreambleEndsAtStartOfLine
Whether the preamble ends at the start of a new line.
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)