29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MemoryBufferRef.h"
36#include "llvm/Support/NativeFormatting.h"
37#include "llvm/Support/Unicode.h"
38#include "llvm/Support/UnicodeCharRanges.h"
64 return II->getObjCKeywordID() == objcKey;
71 return tok::objc_not_keyword;
79 case tok::annot_typename:
80 case tok::annot_decltype:
81 case tok::annot_pack_indexing_type:
87 case tok::kw___int128:
89 case tok::kw_unsigned:
97 case tok::kw__Float16:
98 case tok::kw___float128:
99 case tok::kw___ibm128:
100 case tok::kw_wchar_t:
106#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
107#include "clang/Basic/TransformTypeTraits.def"
108 case tok::kw___auto_type:
109 case tok::kw_char16_t:
110 case tok::kw_char32_t:
112 case tok::kw_decltype:
113 case tok::kw_char8_t:
125void Lexer::anchor() {}
127void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
128 const char *BufEnd) {
129 BufferStart = BufStart;
133 assert(BufEnd[0] == 0 &&
134 "We assume that the input buffer has a null character at the end"
135 " to simplify lexing!");
140 if (BufferStart == BufferPtr) {
142 StringRef Buf(BufferStart, BufferEnd - BufferStart);
143 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
144 .StartsWith(
"\xEF\xBB\xBF", 3)
148 BufferPtr += BOMLength;
151 Is_PragmaLexer =
false;
152 CurrentConflictMarkerState =
CMK_None;
155 IsAtStartOfLine =
true;
156 IsAtPhysicalStartOfLine =
true;
158 HasLeadingSpace =
false;
159 HasLeadingEmptyMacro =
false;
174 ExtendedTokenMode = 0;
176 NewLinePtr =
nullptr;
186 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
188 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
189 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
190 InputFile.getBufferEnd());
199 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
200 bool IsFirstIncludeOfFile)
202 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
203 InitLexer(BufStart, BufPtr, BufEnd);
214 bool IsFirstIncludeOfFile)
215 :
Lexer(
SM.getLocForStartOfFile(FID), langOpts, FromFile.getBufferStart(),
216 FromFile.getBufferStart(), FromFile.getBufferEnd(),
217 IsFirstIncludeOfFile) {}
220 assert(
PP &&
"Cannot reset token mode without a preprocessor");
221 if (LangOpts.TraditionalCPP)
249 FileID SpellingFID =
SM.getFileID(SpellingLoc);
250 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
256 const char *StrData =
SM.getCharacterData(SpellingLoc);
258 L->BufferPtr = StrData;
259 L->BufferEnd = StrData+TokLen;
260 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
264 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
266 ExpansionLocEnd, TokLen);
273 L->Is_PragmaLexer =
true;
278 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
279 this->IsAtStartOfLine = IsAtStartOfLine;
280 assert((BufferStart + Offset) <= BufferEnd);
281 BufferPtr = BufferStart + Offset;
285 typename T::size_type i = 0, e = Str.size();
287 if (Str[i] ==
'\\' || Str[i] == Quote) {
288 Str.insert(Str.begin() + i,
'\\');
291 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
293 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
294 Str[i] != Str[i + 1]) {
300 Str.insert(Str.begin() + i + 1,
'n');
310 std::string
Result = std::string(Str);
311 char Quote = Charify ?
'\'' :
'"';
326 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
329 const char *BufEnd = BufPtr + Tok.
getLength();
333 while (BufPtr < BufEnd) {
335 Spelling[Length++] = CharAndSize.Char;
336 BufPtr += CharAndSize.Size;
338 if (Spelling[Length - 1] ==
'"')
346 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
349 const char *RawEnd = BufEnd;
350 do --RawEnd;
while (*RawEnd !=
'"');
351 size_t RawLength = RawEnd - BufPtr + 1;
354 memcpy(Spelling + Length, BufPtr, RawLength);
362 while (BufPtr < BufEnd) {
364 Spelling[Length++] = CharAndSize.Char;
365 BufPtr += CharAndSize.Size;
369 "NeedsCleaning flag set on token that didn't need cleaning!");
384 std::pair<FileID, unsigned> locInfo =
SM.getDecomposedLoc(loc);
387 bool invalidTemp =
false;
388 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
390 if (invalid) *invalid =
true;
394 const char *tokenBegin = file.data() + locInfo.second;
397 Lexer lexer(
SM.getLocForStartOfFile(locInfo.first), options,
398 file.begin(), tokenBegin, file.end());
406 return StringRef(tokenBegin,
length);
410 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
411 return StringRef(buffer.data(), buffer.size());
421 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
423 bool CharDataInvalid =
false;
433 return std::string(TokStart, TokStart + Tok.
getLength());
454 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
456 const char *TokStart =
nullptr;
458 if (Tok.
is(tok::raw_identifier))
463 Buffer = II->getNameStart();
464 return II->getLength();
474 bool CharDataInvalid =
false;
478 if (CharDataInvalid) {
491 return getSpellingSlow(Tok, TokStart, LangOpts,
const_cast<char*
>(Buffer));
512 bool IgnoreWhiteSpace) {
522 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
524 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
528 const char *StrData = Buffer.data()+LocInfo.second;
530 if (!IgnoreWhiteSpace &&
isWhitespace(SkipEscapedNewLines(StrData)[0]))
534 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
535 Buffer.begin(), StrData, Buffer.end());
544 const char *BufStart = Buffer.data();
545 if (Offset >= Buffer.size())
548 const char *LexStart = BufStart + Offset;
549 for (; LexStart != BufStart; --LexStart) {
564 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
565 if (LocInfo.first.isInvalid())
569 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
575 const char *StrData = Buffer.data() + LocInfo.second;
577 if (!LexStart || LexStart == StrData)
582 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
602 }
while (TheTok.
getKind() != tok::eof);
614 if (!
SM.isMacroArgExpansion(
Loc))
619 std::pair<FileID, unsigned> FileLocInfo =
SM.getDecomposedLoc(FileLoc);
620 std::pair<FileID, unsigned> BeginFileLocInfo =
621 SM.getDecomposedLoc(BeginFileLoc);
622 assert(FileLocInfo.first == BeginFileLocInfo.first &&
623 FileLocInfo.second >= BeginFileLocInfo.second);
629enum PreambleDirectiveKind {
644 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
648 bool InPreprocessorDirective =
false;
652 unsigned MaxLineOffset = 0;
654 const char *CurPtr = Buffer.begin();
655 unsigned CurLine = 0;
656 while (CurPtr != Buffer.end()) {
660 if (CurLine == MaxLines)
664 if (CurPtr != Buffer.end())
665 MaxLineOffset = CurPtr - Buffer.begin();
671 if (InPreprocessorDirective) {
673 if (TheTok.
getKind() == tok::eof) {
684 InPreprocessorDirective =
false;
693 if (MaxLineOffset && TokOffset >= MaxLineOffset)
698 if (TheTok.
getKind() == tok::comment) {
706 Token HashTok = TheTok;
707 InPreprocessorDirective =
true;
716 PreambleDirectiveKind PDK
717 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
718 .Case(
"include", PDK_Skipped)
719 .Case(
"__include_macros", PDK_Skipped)
720 .Case(
"define", PDK_Skipped)
721 .Case(
"undef", PDK_Skipped)
722 .Case(
"line", PDK_Skipped)
723 .Case(
"error", PDK_Skipped)
724 .Case(
"pragma", PDK_Skipped)
725 .Case(
"import", PDK_Skipped)
726 .Case(
"include_next", PDK_Skipped)
727 .Case(
"warning", PDK_Skipped)
728 .Case(
"ident", PDK_Skipped)
729 .Case(
"sccs", PDK_Skipped)
730 .Case(
"assert", PDK_Skipped)
731 .Case(
"unassert", PDK_Skipped)
732 .Case(
"if", PDK_Skipped)
733 .Case(
"ifdef", PDK_Skipped)
734 .Case(
"ifndef", PDK_Skipped)
735 .Case(
"elif", PDK_Skipped)
736 .Case(
"elifdef", PDK_Skipped)
737 .Case(
"elifndef", PDK_Skipped)
738 .Case(
"else", PDK_Skipped)
739 .Case(
"endif", PDK_Skipped)
740 .Default(PDK_Unknown);
757 TheTok.
getKind() == tok::raw_identifier &&
759 LangOpts.CPlusPlusModules) {
762 Token ModuleTok = TheTok;
765 }
while (TheTok.
getKind() == tok::comment);
766 if (TheTok.
getKind() != tok::semi) {
781 if (ActiveCommentLoc.
isValid())
782 End = ActiveCommentLoc;
797 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
800 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
803 unsigned PhysOffset = 0;
808 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
818 for (; CharNo; --CharNo) {
820 TokPtr += CharAndSize.Size;
821 PhysOffset += CharAndSize.Size;
828 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
829 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
878 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
884 *MacroBegin = expansionLoc;
906 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
912 *MacroEnd = expansionLoc;
925 if (
Range.isTokenRange()) {
934 std::tie(FID, BeginOffs) =
SM.getDecomposedLoc(
Begin);
939 if (!
SM.isInFileID(End, FID, &EndOffs) ||
949 return SM.getSLocEntry(
SM.getFileID(
Loc))
951 .isExpansionTokenRange();
973 if (
Range.isTokenRange()) {
994 if (
Range.isTokenRange())
1034 std::pair<FileID, unsigned> beginInfo =
SM.getDecomposedLoc(
Range.
getBegin());
1035 if (beginInfo.first.isInvalid()) {
1041 if (!
SM.isInFileID(
Range.
getEnd(), beginInfo.first, &EndOffs) ||
1042 beginInfo.second > EndOffs) {
1048 bool invalidTemp =
false;
1049 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1056 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1062 assert(
Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1078 Loc =
SM.getImmediateExpansionRange(
Loc).getBegin();
1086 if (
SM.isInFileID(SpellLoc, MacroFID))
1100 std::pair<FileID, unsigned> ExpansionInfo =
SM.getDecomposedLoc(
Loc);
1102 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1103 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1108 assert(
Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1110 while (
SM.isMacroArgExpansion(
Loc))
1111 Loc =
SM.getImmediateExpansionRange(
Loc).getBegin();
1117 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1123 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(
Loc).getBegin());
1127 std::pair<FileID, unsigned> ExpansionInfo =
SM.getDecomposedLoc(
Loc);
1129 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1130 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1139 if (Str - 1 < BufferStart)
1142 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1143 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1144 if (Str - 2 < BufferStart)
1154 return *Str ==
'\\';
1161 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
1162 if (LocInfo.first.isInvalid())
1165 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1171 StringRef Rest = Buffer.substr(
Line - Buffer.data());
1172 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1173 return NumWhitespaceChars == StringRef::npos
1175 : Rest.take_front(NumWhitespaceChars);
1190 unsigned CharNo,
unsigned TokLen) {
1191 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1207 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1213 unsigned TokLen)
const {
1214 assert(
Loc >= BufferStart &&
Loc <= BufferEnd &&
1215 "Location out of range for this buffer!");
1219 unsigned CharNo =
Loc-BufferStart;
1225 assert(
PP &&
"This doesn't work on raw lexers");
1244 case '=':
return '#';
1245 case ')':
return ']';
1246 case '(':
return '[';
1247 case '!':
return '|';
1248 case '\'':
return '^';
1249 case '>':
return '}';
1250 case '/':
return '\\';
1251 case '<':
return '{';
1252 case '-':
return '~';
1267 L->
Diag(CP-2, diag::trigraph_ignored);
1272 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1279unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1284 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1288 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1289 Ptr[Size-1] != Ptr[Size])
1302const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1304 const char *AfterEscape;
1307 }
else if (*
P ==
'?') {
1309 if (
P[1] !=
'?' ||
P[2] !=
'/')
1318 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1319 if (NewLineSize == 0)
return P;
1320 P = AfterEscape+NewLineSize;
1327 bool IncludeComments) {
1330 return std::nullopt;
1335 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
1338 bool InvalidTemp =
false;
1339 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1341 return std::nullopt;
1343 const char *TokenBegin =
File.data() + LocInfo.second;
1346 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1347 TokenBegin,
File.end());
1361 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1363 if (!Tok || Tok->isNot(TKind))
1368 unsigned NumWhitespaceChars = 0;
1369 if (SkipTrailingWhitespaceAndNewLine) {
1370 const char *TokenEnd =
SM.getCharacterData(TokenLoc) + Tok->getLength();
1371 unsigned char C = *TokenEnd;
1374 NumWhitespaceChars++;
1378 if (
C ==
'\n' ||
C ==
'\r') {
1381 NumWhitespaceChars++;
1382 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1383 NumWhitespaceChars++;
1408 if (Ptr[0] ==
'\\') {
1414 return {
'\\', Size};
1418 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1424 Diag(Ptr, diag::backslash_newline_space);
1427 Size += EscapedNewLineSize;
1428 Ptr += EscapedNewLineSize;
1431 auto CharAndSize = getCharAndSizeSlow(Ptr, Tok);
1432 CharAndSize.Size += Size;
1437 return {
'\\',
Size};
1441 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1445 LangOpts.Trigraphs)) {
1451 if (
C ==
'\\')
goto Slash;
1457 return {*Ptr,
Size + 1u};
1471 if (Ptr[0] ==
'\\') {
1477 return {
'\\',
Size};
1480 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1482 Size += EscapedNewLineSize;
1483 Ptr += EscapedNewLineSize;
1486 auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
1487 CharAndSize.Size +=
Size;
1492 return {
'\\',
Size};
1496 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1502 if (
C ==
'\\')
goto Slash;
1508 return {*Ptr,
Size + 1u};
1516void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1517 BufferPtr = BufferStart + Offset;
1518 if (BufferPtr > BufferEnd)
1519 BufferPtr = BufferEnd;
1523 IsAtStartOfLine = StartOfLine;
1524 IsAtPhysicalStartOfLine = StartOfLine;
1528 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1530 return UnicodeWhitespaceChars.contains(Codepoint);
1535 llvm::raw_svector_ostream CharOS(CharBuf);
1536 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1547 bool IsStart,
bool &IsExtension) {
1548 static const llvm::sys::UnicodeCharSet MathStartChars(
1550 static const llvm::sys::UnicodeCharSet MathContinueChars(
1552 if (MathStartChars.contains(
C) ||
1553 (!IsStart && MathContinueChars.contains(
C))) {
1561 bool &IsExtension) {
1562 if (LangOpts.AsmPreprocessor) {
1564 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1566 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1571 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1573 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1577 }
else if (LangOpts.C11) {
1578 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1580 return C11AllowedIDChars.contains(
C);
1582 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1584 return C99AllowedIDChars.contains(
C);
1589 bool &IsExtension) {
1590 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1591 IsExtension =
false;
1592 if (LangOpts.AsmPreprocessor) {
1595 if (LangOpts.CPlusPlus || LangOpts.C23) {
1596 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1597 if (XIDStartChars.contains(
C))
1605 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1607 return !C11DisallowedInitialIDChars.contains(
C);
1609 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1611 return !C99DisallowedInitialIDChars.contains(
C);
1617 static const llvm::sys::UnicodeCharSet MathStartChars(
1619 static const llvm::sys::UnicodeCharSet MathContinueChars(
1622 (void)MathStartChars;
1623 (void)MathContinueChars;
1624 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1625 "Unexpected mathematical notation codepoint");
1641 CannotAppearInIdentifier = 0,
1642 CannotStartIdentifier
1645 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1647 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1649 if (!C99AllowedIDChars.contains(
C)) {
1652 << CannotAppearInIdentifier;
1653 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1656 << CannotStartIdentifier;
1668 struct HomoglyphPair {
1671 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1673 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1726 std::lower_bound(std::begin(SortedHomoglyphs),
1727 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1728 if (Homoglyph->Character ==
C) {
1729 if (Homoglyph->LooksLike) {
1730 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1751 if ((IsFirst && IsIDStart) || (!IsFirst && IsIDContinue))
1754 bool InvalidOnlyAtStart = IsFirst && !IsIDStart && IsIDContinue;
1756 if (!IsFirst || InvalidOnlyAtStart) {
1767bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1769 const char *UCNPtr = CurPtr +
Size;
1770 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1771 if (CodePoint == 0) {
1774 bool IsExtension =
false;
1799 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1800 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1803 while (CurPtr != UCNPtr)
1804 (void)getAndAdvanceChar(CurPtr,
Result);
1808bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1809 llvm::UTF32 CodePoint;
1814 unsigned FirstCodeUnitSize;
1815 getCharAndSize(CurPtr, FirstCodeUnitSize);
1816 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1817 const char *UnicodePtr = CharStart;
1819 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1820 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1821 &CodePoint, llvm::strictConversion);
1822 if (ConvResult != llvm::conversionOK)
1825 bool IsExtension =
false;
1854 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1855 CurPtr = UnicodePtr;
1859bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1860 const char *CurPtr) {
1861 bool IsExtension =
false;
1876 return LexIdentifierContinue(
Result, CurPtr);
1901 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1907 [[maybe_unused]]
const char *BufferEnd) {
1909 alignas(16)
static constexpr char AsciiIdentifierRange[16] = {
1910 '_',
'_',
'A',
'Z',
'a',
'z',
'0',
'9',
1912 constexpr ssize_t BytesPerRegister = 16;
1914 __m128i AsciiIdentifierRangeV =
1917 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) {
1924 if (Consumed == BytesPerRegister)
1930 unsigned char C = *CurPtr;
1936bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
1945 unsigned char C = getCharAndSize(CurPtr, Size);
1947 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1952 if (!LangOpts.DollarIdents)
1956 Diag(CurPtr, diag::ext_dollar_in_identifier);
1957 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1960 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
1968 const char *IdStart = BufferPtr;
1969 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
1970 Result.setRawIdentifierData(IdStart);
1986 if (isCodeCompletionPoint(CurPtr)) {
1988 Result.setKind(tok::code_completion);
1994 assert(*CurPtr == 0 &&
"Completion character must be 0");
1999 if (CurPtr < BufferEnd) {
2017bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
2019 char C1 = CharAndSize1.Char;
2025 char C2 = CharAndSize2.Char;
2026 return (C2 ==
'x' || C2 ==
'X');
2032bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
2034 char C = getCharAndSize(CurPtr, Size);
2037 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2039 if (LangOpts.HLSL &&
C ==
'.' && (*CurPtr ==
'x' || *CurPtr ==
'r')) {
2043 C = getCharAndSize(CurPtr, Size);
2047 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
2050 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
2051 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2055 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
2059 bool IsHexFloat =
true;
2060 if (!LangOpts.C99) {
2061 if (!isHexaLiteral(BufferPtr, LangOpts))
2063 else if (!LangOpts.CPlusPlus17 &&
2064 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
2068 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2072 if (
C ==
'\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) {
2076 Diag(CurPtr, LangOpts.CPlusPlus
2077 ? diag::warn_cxx11_compat_digit_separator
2078 : diag::warn_c23_compat_digit_separator);
2079 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2080 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
2081 return LexNumericConstant(
Result, CurPtr);
2086 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2087 return LexNumericConstant(
Result, CurPtr);
2089 return LexNumericConstant(
Result, CurPtr);
2092 const char *TokStart = BufferPtr;
2093 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2094 Result.setLiteralData(TokStart);
2100const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2101 bool IsStringLiteral) {
2102 assert(LangOpts.CPlusPlus);
2106 char C = getCharAndSize(CurPtr, Size);
2107 bool Consumed =
false;
2110 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2112 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2118 if (!LangOpts.CPlusPlus11) {
2121 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2122 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2133 bool IsUDSuffix =
false;
2136 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2140 const unsigned MaxStandardSuffixLength = 3;
2141 char Buffer[MaxStandardSuffixLength] = {
C };
2142 unsigned Consumed =
Size;
2145 auto [Next, NextSize] =
2149 const StringRef CompleteSuffix(Buffer, Chars);
2155 if (Chars == MaxStandardSuffixLength)
2159 Buffer[Chars++] = Next;
2160 Consumed += NextSize;
2166 Diag(CurPtr, LangOpts.MSVCCompat
2167 ? diag::ext_ms_reserved_user_defined_literal
2168 : diag::ext_reserved_user_defined_literal)
2173 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2178 C = getCharAndSize(CurPtr, Size);
2180 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2181 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2182 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2192bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2194 const char *AfterQuote = CurPtr;
2196 const char *NulCharacter =
nullptr;
2199 (Kind == tok::utf8_string_literal ||
2200 Kind == tok::utf16_string_literal ||
2201 Kind == tok::utf32_string_literal))
2202 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2203 : diag::warn_c99_compat_unicode_literal);
2205 char C = getAndAdvanceChar(CurPtr,
Result);
2210 C = getAndAdvanceChar(CurPtr,
Result);
2212 if (
C ==
'\n' ||
C ==
'\r' ||
2213 (
C == 0 && CurPtr-1 == BufferEnd)) {
2215 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2216 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2221 if (isCodeCompletionPoint(CurPtr-1)) {
2223 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2226 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2231 NulCharacter = CurPtr-1;
2233 C = getAndAdvanceChar(CurPtr,
Result);
2237 if (LangOpts.CPlusPlus)
2238 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2242 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2245 const char *TokStart = BufferPtr;
2246 FormTokenWithChars(
Result, CurPtr, Kind);
2247 Result.setLiteralData(TokStart);
2253bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2261 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2263 unsigned PrefixLen = 0;
2267 llvm::is_contained({
'$',
'@',
'`'}, CurPtr[PrefixLen])) {
2268 const char *Pos = &CurPtr[PrefixLen];
2269 Diag(Pos, LangOpts.CPlusPlus26
2270 ? diag::warn_cxx26_compat_raw_string_literal_character_set
2271 : diag::ext_cxx26_raw_string_literal_character_set)
2272 << StringRef(Pos, 1);
2278 if (CurPtr[PrefixLen] !=
'(') {
2280 const char *PrefixEnd = &CurPtr[PrefixLen];
2281 if (PrefixLen == 16) {
2282 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2283 }
else if (*PrefixEnd ==
'\n') {
2284 Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
2286 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2287 << StringRef(PrefixEnd, 1);
2299 if (
C == 0 && CurPtr-1 == BufferEnd) {
2305 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2310 const char *Prefix = CurPtr;
2311 CurPtr += PrefixLen + 1;
2318 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2319 CurPtr += PrefixLen + 1;
2322 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2324 Diag(BufferPtr, diag::err_unterminated_raw_string)
2325 << StringRef(Prefix, PrefixLen);
2326 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2332 if (LangOpts.CPlusPlus)
2333 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2336 const char *TokStart = BufferPtr;
2337 FormTokenWithChars(
Result, CurPtr, Kind);
2338 Result.setLiteralData(TokStart);
2344bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2346 const char *NulCharacter =
nullptr;
2347 const char *AfterLessPos = CurPtr;
2348 char C = getAndAdvanceChar(CurPtr,
Result);
2353 C = getAndAdvanceChar(CurPtr,
Result);
2356 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2359 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2364 if (isCodeCompletionPoint(CurPtr - 1)) {
2365 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2367 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2370 NulCharacter = CurPtr-1;
2372 C = getAndAdvanceChar(CurPtr,
Result);
2377 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2380 const char *TokStart = BufferPtr;
2381 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2382 Result.setLiteralData(TokStart);
2386void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2387 const char *CompletionPoint,
2390 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2391 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2392 auto Slash = PartialPath.find_last_of(SlashChars);
2394 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2395 const char *StartOfFilename =
2396 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2399 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2402 while (CompletionPoint < BufferEnd) {
2403 char Next = *(CompletionPoint + 1);
2404 if (Next == 0 || Next ==
'\r' || Next ==
'\n')
2407 if (Next == (IsAngled ?
'>' :
'"'))
2409 if (SlashChars.contains(Next))
2421bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2424 const char *NulCharacter =
nullptr;
2427 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2428 Diag(BufferPtr, LangOpts.CPlusPlus
2429 ? diag::warn_cxx98_compat_unicode_literal
2430 : diag::warn_c99_compat_unicode_literal);
2431 else if (Kind == tok::utf8_char_constant)
2432 Diag(BufferPtr, LangOpts.CPlusPlus
2433 ? diag::warn_cxx14_compat_u8_character_literal
2434 : diag::warn_c17_compat_u8_character_literal);
2437 char C = getAndAdvanceChar(CurPtr,
Result);
2440 Diag(BufferPtr, diag::ext_empty_character);
2441 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2448 C = getAndAdvanceChar(CurPtr,
Result);
2450 if (
C ==
'\n' ||
C ==
'\r' ||
2451 (
C == 0 && CurPtr-1 == BufferEnd)) {
2453 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2454 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2459 if (isCodeCompletionPoint(CurPtr-1)) {
2461 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2466 NulCharacter = CurPtr-1;
2468 C = getAndAdvanceChar(CurPtr,
Result);
2472 if (LangOpts.CPlusPlus)
2473 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2477 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2480 const char *TokStart = BufferPtr;
2481 FormTokenWithChars(
Result, CurPtr, Kind);
2482 Result.setLiteralData(TokStart);
2490bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr,
2491 bool &TokAtPhysicalStartOfLine) {
2495 unsigned char Char = *CurPtr;
2497 const char *lastNewLine =
nullptr;
2498 auto setLastNewLine = [&](
const char *Ptr) {
2504 setLastNewLine(CurPtr - 1);
2523 if (*CurPtr ==
'\n')
2524 setLastNewLine(CurPtr);
2531 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2533 IsAtStartOfLine =
true;
2534 IsAtPhysicalStartOfLine =
true;
2541 char PrevChar = CurPtr[-1];
2547 TokAtPhysicalStartOfLine =
true;
2549 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2566bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr,
2567 bool &TokAtPhysicalStartOfLine) {
2572 Diag(BufferPtr, diag::ext_line_comment);
2590 bool UnicodeDecodingAlreadyDiagnosed =
false;
2597 C !=
'\n' &&
C !=
'\r') {
2599 UnicodeDecodingAlreadyDiagnosed =
false;
2603 unsigned Length = llvm::getUTF8SequenceSize(
2604 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2607 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2608 UnicodeDecodingAlreadyDiagnosed =
true;
2611 UnicodeDecodingAlreadyDiagnosed =
false;
2617 const char *NextLine = CurPtr;
2620 const char *EscapePtr = CurPtr-1;
2621 bool HasSpace =
false;
2627 if (*EscapePtr ==
'\\')
2630 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2631 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2633 CurPtr = EscapePtr-2;
2639 Diag(EscapePtr, diag::backslash_newline_space);
2646 const char *OldPtr = CurPtr;
2649 C = getAndAdvanceChar(CurPtr,
Result);
2654 if (
C != 0 && CurPtr == OldPtr+1) {
2662 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2663 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2664 for (; OldPtr != CurPtr; ++OldPtr)
2665 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2669 const char *ForwardPtr = CurPtr;
2672 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2677 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2682 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2687 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2705 return SaveLineComment(
Result, CurPtr);
2719 NewLinePtr = CurPtr++;
2723 TokAtPhysicalStartOfLine =
true;
2732bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2735 FormTokenWithChars(
Result, CurPtr, tok::comment);
2747 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2751 Result.setKind(tok::comment);
2762 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2765 const char *TrigraphPos =
nullptr;
2767 const char *SpacePos =
nullptr;
2774 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2776 if (CurPtr[0] == CurPtr[1])
2790 if (*CurPtr ==
'\\') {
2792 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2794 TrigraphPos = CurPtr - 2;
2805 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2814 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2818 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2823 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2827 L->
Diag(SpacePos, diag::backslash_newline_space);
2833#include <emmintrin.h>
2848bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr,
2849 bool &TokAtPhysicalStartOfLine) {
2859 unsigned char C = getCharAndSize(CurPtr, CharSize);
2861 if (
C == 0 && CurPtr == BufferEnd+1) {
2863 Diag(BufferPtr, diag::err_unterminated_block_comment);
2869 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2886 bool UnicodeDecodingAlreadyDiagnosed =
false;
2891 if (CurPtr + 24 < BufferEnd &&
2896 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2901 if (
C ==
'/')
goto FoundSlash;
2905 while (CurPtr + 16 < BufferEnd) {
2907 if (LLVM_UNLIKELY(Mask != 0)) {
2917 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2923 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2924 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2925 0x80, 0x80, 0x80, 0x80};
2926 __vector
unsigned char Slashes = {
2927 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2928 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2930 while (CurPtr + 16 < BufferEnd) {
2932 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
2934 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
2941 while (CurPtr + 16 < BufferEnd) {
2942 bool HasNonASCII =
false;
2943 for (
unsigned I = 0; I < 16; ++I)
2944 HasNonASCII |= !
isASCII(CurPtr[I]);
2946 if (LLVM_UNLIKELY(HasNonASCII))
2949 bool HasSlash =
false;
2950 for (
unsigned I = 0; I < 16; ++I)
2951 HasSlash |= CurPtr[I] ==
'/';
2965 while (
C !=
'/' &&
C !=
'\0') {
2967 UnicodeDecodingAlreadyDiagnosed =
false;
2974 unsigned Length = llvm::getUTF8SequenceSize(
2975 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
2978 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
2979 UnicodeDecodingAlreadyDiagnosed =
true;
2981 UnicodeDecodingAlreadyDiagnosed =
false;
2982 CurPtr += Length - 1;
2989 if (CurPtr[-2] ==
'*')
2992 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
2994 LangOpts.Trigraphs)) {
3000 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
3005 Diag(CurPtr-1, diag::warn_nested_block_comment);
3007 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
3009 Diag(BufferPtr, diag::err_unterminated_block_comment);
3018 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3024 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
3043 FormTokenWithChars(
Result, CurPtr, tok::comment);
3052 SkipWhitespace(
Result, CurPtr+1, TokAtPhysicalStartOfLine);
3070 "Must be in a preprocessing directive!");
3075 const char *CurPtr = BufferPtr;
3077 char Char = getAndAdvanceChar(CurPtr, Tmp);
3085 if (CurPtr-1 != BufferEnd) {
3086 if (isCodeCompletionPoint(CurPtr-1)) {
3102 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3103 BufferPtr = CurPtr-1;
3107 if (Tmp.
is(tok::code_completion)) {
3112 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3124bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3132 FormTokenWithChars(
Result, CurPtr, tok::eod);
3144 BufferPtr = BufferEnd;
3145 FormTokenWithChars(
Result, BufferEnd, tok::eof);
3165 diag::err_pp_unterminated_conditional);
3171 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
3176 if (LangOpts.CPlusPlus11) {
3180 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
3181 DiagID = diag::warn_cxx98_compat_no_newline_eof;
3183 DiagID = diag::warn_no_newline_eof;
3186 DiagID = diag::ext_no_newline_eof;
3189 Diag(BufferEnd, DiagID)
3203unsigned Lexer::isNextPPTokenLParen() {
3204 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3206 if (isDependencyDirectivesLexer()) {
3207 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3209 return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
3219 const char *TmpBufferPtr = BufferPtr;
3221 bool atStartOfLine = IsAtStartOfLine;
3222 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3223 bool leadingSpace = HasLeadingSpace;
3229 BufferPtr = TmpBufferPtr;
3231 HasLeadingSpace = leadingSpace;
3232 IsAtStartOfLine = atStartOfLine;
3233 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3238 if (Tok.
is(tok::eof))
3240 return Tok.
is(tok::l_paren);
3246 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3248 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3249 size_t Pos = RestOfBuffer.find(Terminator);
3250 while (Pos != StringRef::npos) {
3253 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3254 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3255 Pos = RestOfBuffer.find(Terminator);
3258 return RestOfBuffer.data()+Pos;
3267bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3269 if (CurPtr != BufferStart &&
3270 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3274 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
"<<<<<<<") &&
3275 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
">>>> "))
3290 Diag(CurPtr, diag::err_conflict_marker);
3291 CurrentConflictMarkerState =
Kind;
3295 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3296 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3311bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3313 if (CurPtr != BufferStart &&
3314 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3323 for (
unsigned i = 1; i != 4; ++i)
3324 if (CurPtr[i] != CurPtr[0])
3331 CurrentConflictMarkerState)) {
3335 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3341 CurrentConflictMarkerState =
CMK_None;
3349 const char *BufferEnd) {
3350 if (CurPtr == BufferEnd)
3353 for (; CurPtr != BufferEnd; ++CurPtr) {
3354 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3360bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3361 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3367 const char *Start = CurPtr - 1;
3368 if (!LangOpts.AllowEditorPlaceholders)
3369 Diag(Start, diag::err_placeholder_in_source);
3371 FormTokenWithChars(
Result, End, tok::raw_identifier);
3372 Result.setRawIdentifierData(Start);
3379bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3388std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3389 const char *SlashLoc,
3392 char Kind = getCharAndSize(StartPtr, CharSize);
3393 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3395 unsigned NumHexDigits;
3398 else if (Kind ==
'U')
3401 bool Delimited =
false;
3402 bool FoundEndDelimiter =
false;
3406 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3408 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3409 return std::nullopt;
3412 const char *CurPtr = StartPtr + CharSize;
3413 const char *KindLoc = &CurPtr[-1];
3416 while (Count != NumHexDigits || Delimited) {
3417 char C = getCharAndSize(CurPtr, CharSize);
3418 if (!Delimited && Count == 0 &&
C ==
'{') {
3424 if (Delimited &&
C ==
'}') {
3426 FoundEndDelimiter =
true;
3430 unsigned Value = llvm::hexDigitValue(
C);
3435 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3436 << StringRef(KindLoc, 1);
3437 return std::nullopt;
3440 if (CodePoint & 0xF000'0000) {
3442 Diag(KindLoc, diag::err_escape_too_large) << 0;
3443 return std::nullopt;
3454 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3455 : diag::warn_ucn_escape_no_digits)
3456 << StringRef(KindLoc, 1);
3457 return std::nullopt;
3460 if (Delimited && Kind ==
'U') {
3462 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3463 return std::nullopt;
3466 if (!Delimited && Count != NumHexDigits) {
3468 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3470 if (Count == 4 && NumHexDigits == 8) {
3472 Diag(KindLoc, diag::note_ucn_four_not_eight)
3476 return std::nullopt;
3479 if (Delimited &&
PP) {
3481 ? diag::warn_cxx23_delimited_escape_sequence
3482 : diag::ext_delimited_escape_sequence)
3491 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3494 while (StartPtr != CurPtr)
3495 (void)getAndAdvanceChar(StartPtr, *
Result);
3502std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3503 const char *SlashLoc,
3508 char C = getCharAndSize(StartPtr, CharSize);
3509 assert(
C ==
'N' &&
"expected \\N{...}");
3511 const char *CurPtr = StartPtr + CharSize;
3512 const char *KindLoc = &CurPtr[-1];
3514 C = getCharAndSize(CurPtr, CharSize);
3517 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3518 return std::nullopt;
3521 const char *StartName = CurPtr;
3522 bool FoundEndDelimiter =
false;
3525 C = getCharAndSize(CurPtr, CharSize);
3528 FoundEndDelimiter =
true;
3534 Buffer.push_back(
C);
3537 if (!FoundEndDelimiter || Buffer.empty()) {
3539 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3540 : diag::warn_delimited_ucn_incomplete)
3541 << StringRef(KindLoc, 1);
3542 return std::nullopt;
3545 StringRef Name(Buffer.data(), Buffer.size());
3546 std::optional<char32_t> Match =
3547 llvm::sys::unicode::nameToCodepointStrict(Name);
3548 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3550 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3552 Diag(StartName, diag::err_invalid_ucn_name)
3553 << StringRef(Buffer.data(), Buffer.size())
3556 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3567 if (Diagnose && Match)
3569 ? diag::warn_cxx23_delimited_escape_sequence
3570 : diag::ext_delimited_escape_sequence)
3577 if (LooseMatch && Diagnose)
3578 Match = LooseMatch->CodePoint;
3585 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3588 while (StartPtr != CurPtr)
3589 (void)getAndAdvanceChar(StartPtr, *
Result);
3593 return Match ? std::optional<uint32_t>(*Match) :
std::nullopt;
3596uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3600 std::optional<uint32_t> CodePointOpt;
3601 char Kind = getCharAndSize(StartPtr, CharSize);
3602 if (Kind ==
'u' || Kind ==
'U')
3603 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3604 else if (Kind ==
'N')
3605 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3610 uint32_t CodePoint = *CodePointOpt;
3613 if (LangOpts.AsmPreprocessor)
3632 if (CodePoint < 0xA0) {
3636 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3637 Diag(BufferPtr, diag::err_ucn_control_character);
3639 char C =
static_cast<char>(CodePoint);
3640 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3645 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3650 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3651 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3653 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3661bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3662 const char *CurPtr) {
3665 Diag(BufferPtr, diag::ext_unicode_whitespace)
3674void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3675 IsAtStartOfLine =
Result.isAtStartOfLine();
3676 HasLeadingSpace =
Result.hasLeadingSpace();
3677 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3682 assert(!isDependencyDirectivesLexer());
3688 if (IsAtStartOfLine) {
3690 IsAtStartOfLine =
false;
3693 if (HasLeadingSpace) {
3695 HasLeadingSpace =
false;
3698 if (HasLeadingEmptyMacro) {
3700 HasLeadingEmptyMacro =
false;
3703 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3704 IsAtPhysicalStartOfLine =
false;
3707 bool returnedToken = LexTokenInternal(
Result, atPhysicalStartOfLine);
3709 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3710 return returnedToken;
3718bool Lexer::LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine) {
3720 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3721 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3724 const char *CurPtr = BufferPtr;
3736 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3745 unsigned SizeTmp, SizeTmp2;
3748 char Char = getAndAdvanceChar(CurPtr,
Result);
3752 NewLinePtr =
nullptr;
3757 if (CurPtr-1 == BufferEnd)
3758 return LexEndOfFile(
Result, CurPtr-1);
3761 if (isCodeCompletionPoint(CurPtr-1)) {
3764 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3769 Diag(CurPtr-1, diag::null_in_file);
3771 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3780 if (LangOpts.MicrosoftExt) {
3782 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3783 return LexEndOfFile(
Result, CurPtr-1);
3787 Kind = tok::unknown;
3791 if (CurPtr[0] ==
'\n')
3792 (void)getAndAdvanceChar(CurPtr,
Result);
3806 IsAtStartOfLine =
true;
3807 IsAtPhysicalStartOfLine =
true;
3808 NewLinePtr = CurPtr - 1;
3817 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3827 SkipHorizontalWhitespace:
3829 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3838 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3839 if (SkipLineComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3841 goto SkipIgnoredUnits;
3843 if (SkipBlockComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3845 goto SkipIgnoredUnits;
3847 goto SkipHorizontalWhitespace;
3855 case '0':
case '1':
case '2':
case '3':
case '4':
3856 case '5':
case '6':
case '7':
case '8':
case '9':
3859 return LexNumericConstant(
Result, CurPtr);
3868 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3869 Char = getCharAndSize(CurPtr, SizeTmp);
3873 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3874 tok::utf16_string_literal);
3878 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3879 tok::utf16_char_constant);
3882 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3883 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3884 return LexRawStringLiteral(
Result,
3885 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3887 tok::utf16_string_literal);
3890 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3894 return LexStringLiteral(
Result,
3895 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3897 tok::utf8_string_literal);
3898 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
3899 return LexCharConstant(
3900 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3902 tok::utf8_char_constant);
3904 if (Char2 ==
'R' && LangOpts.RawStringLiterals) {
3906 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3909 return LexRawStringLiteral(
Result,
3910 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3913 tok::utf8_string_literal);
3920 return LexIdentifierContinue(
Result, CurPtr);
3926 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3927 Char = getCharAndSize(CurPtr, SizeTmp);
3931 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3932 tok::utf32_string_literal);
3936 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3937 tok::utf32_char_constant);
3940 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3941 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3942 return LexRawStringLiteral(
Result,
3943 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3945 tok::utf32_string_literal);
3949 return LexIdentifierContinue(
Result, CurPtr);
3955 if (LangOpts.RawStringLiterals) {
3956 Char = getCharAndSize(CurPtr, SizeTmp);
3959 return LexRawStringLiteral(
Result,
3960 ConsumeChar(CurPtr, SizeTmp,
Result),
3961 tok::string_literal);
3965 return LexIdentifierContinue(
Result, CurPtr);
3970 Char = getCharAndSize(CurPtr, SizeTmp);
3974 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3975 tok::wide_string_literal);
3978 if (LangOpts.RawStringLiterals && Char ==
'R' &&
3979 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3980 return LexRawStringLiteral(
Result,
3981 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3983 tok::wide_string_literal);
3987 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3988 tok::wide_char_constant);
3993 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
3994 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
3995 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
3996 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
3997 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
3998 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
3999 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
4000 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
4004 return LexIdentifierContinue(
Result, CurPtr);
4007 if (LangOpts.DollarIdents) {
4009 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
4012 return LexIdentifierContinue(
Result, CurPtr);
4015 Kind = tok::unknown;
4022 return LexCharConstant(
Result, CurPtr, tok::char_constant);
4028 return LexStringLiteral(
Result, CurPtr,
4030 : tok::string_literal);
4034 Kind = tok::question;
4037 Kind = tok::l_square;
4040 Kind = tok::r_square;
4043 Kind = tok::l_paren;
4046 Kind = tok::r_paren;
4049 Kind = tok::l_brace;
4052 Kind = tok::r_brace;
4055 Char = getCharAndSize(CurPtr, SizeTmp);
4056 if (Char >=
'0' && Char <=
'9') {
4060 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
4061 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
4062 Kind = tok::periodstar;
4064 }
else if (Char ==
'.' &&
4065 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
4066 Kind = tok::ellipsis;
4067 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4074 Char = getCharAndSize(CurPtr, SizeTmp);
4077 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4078 }
else if (Char ==
'=') {
4079 Kind = tok::ampequal;
4080 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4086 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4087 Kind = tok::starequal;
4088 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4094 Char = getCharAndSize(CurPtr, SizeTmp);
4096 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4097 Kind = tok::plusplus;
4098 }
else if (Char ==
'=') {
4099 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4100 Kind = tok::plusequal;
4106 Char = getCharAndSize(CurPtr, SizeTmp);
4108 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4109 Kind = tok::minusminus;
4110 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4111 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4112 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4114 Kind = tok::arrowstar;
4115 }
else if (Char ==
'>') {
4116 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4118 }
else if (Char ==
'=') {
4119 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4120 Kind = tok::minusequal;
4129 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4130 Kind = tok::exclaimequal;
4131 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4133 Kind = tok::exclaim;
4138 Char = getCharAndSize(CurPtr, SizeTmp);
4148 bool TreatAsComment =
4149 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4150 if (!TreatAsComment)
4152 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4154 if (TreatAsComment) {
4155 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4156 TokAtPhysicalStartOfLine))
4162 goto SkipIgnoredUnits;
4167 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4168 TokAtPhysicalStartOfLine))
4177 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4178 Kind = tok::slashequal;
4184 Char = getCharAndSize(CurPtr, SizeTmp);
4186 Kind = tok::percentequal;
4187 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4188 }
else if (LangOpts.Digraphs && Char ==
'>') {
4189 Kind = tok::r_brace;
4190 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4191 }
else if (LangOpts.Digraphs && Char ==
':') {
4192 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4193 Char = getCharAndSize(CurPtr, SizeTmp);
4194 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4195 Kind = tok::hashhash;
4196 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4198 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4199 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4201 Diag(BufferPtr, diag::ext_charize_microsoft);
4208 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4209 goto HandleDirective;
4214 Kind = tok::percent;
4218 Char = getCharAndSize(CurPtr, SizeTmp);
4220 return LexAngledStringLiteral(
Result, CurPtr);
4221 }
else if (Char ==
'<') {
4222 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4224 Kind = tok::lesslessequal;
4225 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4227 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4231 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4235 }
else if (LangOpts.CUDA && After ==
'<') {
4236 Kind = tok::lesslessless;
4237 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4240 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4241 Kind = tok::lessless;
4243 }
else if (Char ==
'=') {
4244 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4246 if (LangOpts.CPlusPlus20) {
4248 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4249 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4251 Kind = tok::spaceship;
4257 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4262 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4263 Kind = tok::lessequal;
4264 }
else if (LangOpts.Digraphs && Char ==
':') {
4265 if (LangOpts.CPlusPlus11 &&
4266 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4273 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4274 if (After !=
':' && After !=
'>') {
4277 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4282 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4283 Kind = tok::l_square;
4284 }
else if (LangOpts.Digraphs && Char ==
'%') {
4285 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4286 Kind = tok::l_brace;
4287 }
else if (Char ==
'#' && SizeTmp == 1 &&
4288 lexEditorPlaceholder(
Result, CurPtr)) {
4295 Char = getCharAndSize(CurPtr, SizeTmp);
4297 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4298 Kind = tok::greaterequal;
4299 }
else if (Char ==
'>') {
4300 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4302 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4304 Kind = tok::greatergreaterequal;
4305 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4309 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4312 }
else if (LangOpts.CUDA && After ==
'>') {
4313 Kind = tok::greatergreatergreater;
4314 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4317 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4318 Kind = tok::greatergreater;
4321 Kind = tok::greater;
4325 Char = getCharAndSize(CurPtr, SizeTmp);
4327 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4328 Kind = tok::caretequal;
4330 if (LangOpts.OpenCL && Char ==
'^')
4331 Diag(CurPtr, diag::err_opencl_logical_exclusive_or);
4336 Char = getCharAndSize(CurPtr, SizeTmp);
4338 Kind = tok::pipeequal;
4339 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4340 }
else if (Char ==
'|') {
4342 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4344 Kind = tok::pipepipe;
4345 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4351 Char = getCharAndSize(CurPtr, SizeTmp);
4352 if (LangOpts.Digraphs && Char ==
'>') {
4353 Kind = tok::r_square;
4354 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4355 }
else if (Char ==
':') {
4356 Kind = tok::coloncolon;
4357 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4366 Char = getCharAndSize(CurPtr, SizeTmp);
4369 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4372 Kind = tok::equalequal;
4373 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4382 Char = getCharAndSize(CurPtr, SizeTmp);
4384 Kind = tok::hashhash;
4385 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4386 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4389 Diag(BufferPtr, diag::ext_charize_microsoft);
4390 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4396 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4397 goto HandleDirective;
4405 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
4408 Kind = tok::unknown;
4413 if (!LangOpts.AsmPreprocessor) {
4414 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4415 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4416 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4424 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4428 Kind = tok::unknown;
4433 Kind = tok::unknown;
4437 llvm::UTF32 CodePoint;
4442 llvm::ConversionResult Status =
4443 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4444 (
const llvm::UTF8 *)BufferEnd,
4446 llvm::strictConversion);
4447 if (Status == llvm::conversionOK) {
4448 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4449 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4456 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4462 Kind = tok::unknown;
4469 Diag(CurPtr, diag::err_invalid_utf8);
4471 BufferPtr = CurPtr+1;
4483 FormTokenWithChars(
Result, CurPtr, Kind);
4489 FormTokenWithChars(
Result, CurPtr, tok::hash);
4504const char *Lexer::convertDependencyDirectiveToken(
4506 const char *TokPtr = BufferStart + DDTok.
Offset;
4512 BufferPtr = TokPtr + DDTok.
Length;
4516bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4517 assert(isDependencyDirectivesLexer());
4519 using namespace dependency_directives_scan;
4521 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4522 if (DepDirectives.front().Kind == pp_eof)
4523 return LexEndOfFile(
Result, BufferEnd);
4524 if (DepDirectives.front().Kind == tokens_present_before_eof)
4526 NextDepDirectiveTokenIndex = 0;
4527 DepDirectives = DepDirectives.drop_front();
4531 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4532 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4538 BufferPtr = BufferStart + DDTok.
Offset;
4539 LexAngledStringLiteral(
Result, BufferPtr + 1);
4540 if (
Result.isNot(tok::header_name))
4545 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4546 if (BufferStart + NextTok.
Offset >= BufferPtr)
4548 ++NextDepDirectiveTokenIndex;
4553 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4555 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4559 if (
Result.is(tok::raw_identifier)) {
4560 Result.setRawIdentifierData(TokPtr);
4568 if (
Result.isLiteral()) {
4569 Result.setLiteralData(TokPtr);
4572 if (
Result.is(tok::colon)) {
4574 if (*BufferPtr ==
':') {
4575 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4577 ++NextDepDirectiveTokenIndex;
4578 Result.setKind(tok::coloncolon);
4588bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4589 assert(isDependencyDirectivesLexer());
4591 using namespace dependency_directives_scan;
4594 unsigned NestedIfs = 0;
4596 DepDirectives = DepDirectives.drop_front();
4597 switch (DepDirectives.front().Kind) {
4599 llvm_unreachable(
"unexpected 'pp_none'");
4640 NextDepDirectiveTokenIndex = 0;
4641 return LexEndOfFile(
Result, BufferEnd);
4646 DepDirectives.front().Tokens.front();
4647 assert(DDTok.
is(tok::hash));
4648 NextDepDirectiveTokenIndex = 1;
4650 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?...
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static const char * fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd)
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float c
__PTRDIFF_TYPE__ ptrdiff_t
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
void ExitTopLevelConditional()
Called when the lexer exits the top-level conditional.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
bool LexEditorPlaceholders
When enabled, the preprocessor will construct editor placeholder tokens.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
bool isRecordingPreamble() const
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
SourceManager & getSourceManager() const
EmptylineHandler * getEmptylineHandler() const
bool getCommentRetentionState() const
bool hadModuleLoaderFatalFailure() const
PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
IdentifierTable & getIdentifierTable()
const LangOptions & getLangOpts() const
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
DiagnosticsEngine & getDiagnostics() const
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
A trivial tuple used to represent a source range.
void setBegin(SourceLocation b)
SourceLocation getEnd() const
SourceLocation getBegin() const
void setEnd(SourceLocation e)
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
bool hasUCN() const
Returns true if this token contains a universal character name.
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
const FunctionProtoType * T
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
#define _mm_cmpistri(A, B, M)
Uses the immediate operand M to perform a comparison of string data with implicitly defined lengths t...
#define _SIDD_LEAST_SIGNIFICANT
#define _SIDD_NEGATIVE_POLARITY
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const