13#include "llvm/ADT/StringExtras.h"
14#include "llvm/ADT/StringSwitch.h"
15#include "llvm/Support/ConvertUTF.h"
16#include "llvm/Support/ErrorHandling.h"
22 llvm::errs() <<
"comments::Token Kind=" << Kind <<
" ";
24 llvm::errs() <<
" " << Length <<
" \"" << L.
getSpelling(*
this,
SM) <<
"\"\n";
40 llvm::BumpPtrAllocator &Allocator,
42 char *Resolved = Allocator.Allocate<
char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
43 char *ResolvedPtr = Resolved;
44 if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
45 return StringRef(Resolved, ResolvedPtr - Resolved);
52#include "clang/AST/CommentHTMLTags.inc"
53#include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
57StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name)
const {
59 return llvm::StringSwitch<StringRef>(Name)
66 .Default(translateHTMLNamedCharacterReferenceToUTF8(Name));
69StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name)
const {
70 unsigned CodePoint = 0;
71 for (
unsigned i = 0, e = Name.size(); i != e; ++i) {
74 CodePoint += Name[i] -
'0';
79StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name)
const {
80 unsigned CodePoint = 0;
81 for (
unsigned i = 0, e = Name.size(); i != e; ++i) {
83 const char C = Name[i];
85 CodePoint += llvm::hexDigitValue(
C);
90void Lexer::skipLineStartingDecorations() {
92 assert(CommentState == LCS_InsideCComment);
94 if (BufferPtr == CommentEnd)
97 const char *NewBufferPtr = BufferPtr;
99 if (++NewBufferPtr == CommentEnd)
101 if (*NewBufferPtr ==
'*')
102 BufferPtr = NewBufferPtr + 1;
107const char *findNewline(
const char *BufferPtr,
const char *BufferEnd) {
108 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
115const char *
skipNewline(
const char *BufferPtr,
const char *BufferEnd) {
116 if (BufferPtr == BufferEnd)
119 if (*BufferPtr ==
'\n')
122 assert(*BufferPtr ==
'\r');
124 if (BufferPtr != BufferEnd && *BufferPtr ==
'\n')
130const char *skipNamedCharacterReference(
const char *BufferPtr,
131 const char *BufferEnd) {
132 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
139const char *skipDecimalCharacterReference(
const char *BufferPtr,
140 const char *BufferEnd) {
141 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
148const char *skipHexCharacterReference(
const char *BufferPtr,
149 const char *BufferEnd) {
150 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
157bool isHTMLIdentifierStartingCharacter(
char C) {
161bool isHTMLIdentifierCharacter(
char C) {
165const char *skipHTMLIdentifier(
const char *BufferPtr,
const char *BufferEnd) {
166 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
167 if (!isHTMLIdentifierCharacter(*BufferPtr))
177const char *skipHTMLQuotedString(
const char *BufferPtr,
const char *BufferEnd)
179 const char Quote = *BufferPtr;
180 assert(Quote ==
'\"' || Quote ==
'\'');
183 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
184 const char C = *BufferPtr;
185 if (
C == Quote && BufferPtr[-1] !=
'\\')
191const char *
skipWhitespace(
const char *BufferPtr,
const char *BufferEnd) {
192 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
199bool isWhitespace(
const char *BufferPtr,
const char *BufferEnd) {
203bool isCommandNameStartCharacter(
char C) {
207bool isCommandNameCharacter(
char C) {
211const char *skipCommandName(
const char *BufferPtr,
const char *BufferEnd) {
212 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
213 if (!isCommandNameCharacter(*BufferPtr))
221const char *findBCPLCommentEnd(
const char *BufferPtr,
const char *BufferEnd) {
222 const char *CurPtr = BufferPtr;
223 while (CurPtr != BufferEnd) {
226 if (CurPtr == BufferEnd)
230 const char *EscapePtr = CurPtr - 1;
234 if (*EscapePtr ==
'\\' ||
235 (EscapePtr - 2 >= BufferPtr && EscapePtr[0] ==
'/' &&
236 EscapePtr[-1] ==
'?' && EscapePtr[-2] ==
'?')) {
247const char *findCCommentEnd(
const char *BufferPtr,
const char *BufferEnd) {
248 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
249 if (*BufferPtr ==
'*') {
250 assert(BufferPtr + 1 != BufferEnd);
251 if (*(BufferPtr + 1) ==
'/')
255 llvm_unreachable(
"buffer end hit before '*/' was seen");
260void Lexer::formTokenWithChars(Token &
Result,
const char *TokEnd,
262 const unsigned TokLen = TokEnd - BufferPtr;
263 Result.setLocation(getSourceLocation(BufferPtr));
267 Result.TextPtr =
"<UNSET>";
273const char *Lexer::skipTextToken() {
274 const char *TokenPtr = BufferPtr;
275 assert(TokenPtr < CommentEnd);
276 StringRef TokStartSymbols = ParseCommands ?
"\n\r\\@\"&<" :
"\n\r";
280 StringRef(TokenPtr, CommentEnd - TokenPtr).find_first_of(TokStartSymbols);
281 if (End == StringRef::npos)
286 if (*(TokenPtr + End) ==
'\"') {
288 End = StringRef(TokenPtr, CommentEnd - TokenPtr).find_first_of(
"\n\r\"");
289 if (End != StringRef::npos && *(TokenPtr + End) ==
'\"')
293 return TokenPtr + End;
296void Lexer::lexCommentText(Token &
T) {
297 assert(CommentState == LCS_InsideBCPLComment ||
298 CommentState == LCS_InsideCComment);
301 auto HandleNonCommandToken = [&]() ->
void {
302 assert(State == LS_Normal);
304 const char *TokenPtr = BufferPtr;
305 assert(TokenPtr < CommentEnd);
312 if (CommentState == LCS_InsideCComment)
313 skipLineStartingDecorations();
317 return formTextToken(
T, skipTextToken());
322 return HandleNonCommandToken();
327 case LS_VerbatimBlockFirstLine:
328 lexVerbatimBlockFirstLine(
T);
330 case LS_VerbatimBlockBody:
331 lexVerbatimBlockBody(
T);
333 case LS_VerbatimLineText:
334 lexVerbatimLineText(
T);
336 case LS_HTMLStartTag:
344 assert(State == LS_Normal);
345 const char *TokenPtr = BufferPtr;
346 assert(TokenPtr < CommentEnd);
356 if (TokenPtr == CommentEnd) {
357 formTextToken(
T, TokenPtr);
365 case '\\':
case '@':
case '&':
case '$':
366 case '#':
case '<':
case '>':
case '%':
367 case '\"':
case '.':
case ':':
370 if (
C ==
':' && TokenPtr != CommentEnd && *TokenPtr ==
':') {
374 StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
376 T.setText(UnescapedText);
381 if (!isCommandNameStartCharacter(*TokenPtr)) {
382 formTextToken(
T, TokenPtr);
386 TokenPtr = skipCommandName(TokenPtr, CommentEnd);
387 unsigned Length = TokenPtr - (BufferPtr + 1);
391 if (Length == 1 && TokenPtr[-1] ==
'f' && TokenPtr != CommentEnd) {
393 if (
C ==
'$' ||
C ==
'(' ||
C ==
')' ||
C ==
'[' ||
C ==
']' ||
394 C ==
'{' ||
C ==
'}') {
400 StringRef CommandName(BufferPtr + 1, Length);
405 StringRef CorrectedName = Info->
Name;
406 SourceLocation
Loc = getSourceLocation(BufferPtr);
407 SourceLocation EndLoc = getSourceLocation(TokenPtr);
408 SourceRange FullRange = SourceRange(
Loc, EndLoc);
409 SourceRange CommandRange(
Loc.getLocWithOffset(1), EndLoc);
410 Diag(
Loc, diag::warn_correct_comment_command_name)
411 << FullRange << CommandName << CorrectedName
415 T.setUnknownCommandName(CommandName);
416 Diag(
T.getLocation(), diag::warn_unknown_comment_command_name)
417 << SourceRange(
T.getLocation(),
T.getEndLocation());
421 if (Info->IsVerbatimBlockCommand) {
422 setupAndLexVerbatimBlock(
T, TokenPtr, *BufferPtr, Info);
425 if (Info->IsVerbatimLineCommand) {
426 setupAndLexVerbatimLine(
T, TokenPtr, Info);
429 formTokenWithChars(
T, TokenPtr, CommandKind);
430 T.setCommandID(Info->getID());
435 lexHTMLCharacterReference(
T);
440 if (TokenPtr == CommentEnd) {
441 formTextToken(
T, TokenPtr);
444 const char C = *TokenPtr;
445 if (isHTMLIdentifierStartingCharacter(
C))
446 setupAndLexHTMLStartTag(
T);
448 setupAndLexHTMLEndTag(
T);
450 formTextToken(
T, TokenPtr);
455 return HandleNonCommandToken();
459void Lexer::setupAndLexVerbatimBlock(Token &
T,
460 const char *TextBegin,
461 char Marker,
const CommandInfo *Info) {
462 assert(Info->IsVerbatimBlockCommand);
464 VerbatimBlockEndCommandName.clear();
465 VerbatimBlockEndCommandName.append(Marker ==
'\\' ?
"\\" :
"@");
466 VerbatimBlockEndCommandName.append(Info->EndCommandName);
469 T.setVerbatimBlockID(Info->getID());
474 if (BufferPtr != CommentEnd &&
477 State = LS_VerbatimBlockBody;
481 State = LS_VerbatimBlockFirstLine;
484void Lexer::lexVerbatimBlockFirstLine(Token &
T) {
486 assert(BufferPtr < CommentEnd);
492 const char *Newline = findNewline(BufferPtr, CommentEnd);
493 StringRef
Line(BufferPtr, Newline - BufferPtr);
496 size_t Pos =
Line.find(VerbatimBlockEndCommandName);
498 const char *NextLine;
499 if (Pos == StringRef::npos) {
503 }
else if (Pos == 0) {
505 const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
506 StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
513 TextEnd = BufferPtr + Pos;
522 StringRef
Text(BufferPtr, TextEnd - BufferPtr);
524 T.setVerbatimBlockText(
Text);
526 State = LS_VerbatimBlockBody;
529void Lexer::lexVerbatimBlockBody(Token &
T) {
530 assert(State == LS_VerbatimBlockBody);
532 if (CommentState == LCS_InsideCComment)
533 skipLineStartingDecorations();
535 if (BufferPtr == CommentEnd) {
537 T.setVerbatimBlockText(
"");
541 lexVerbatimBlockFirstLine(
T);
544void Lexer::setupAndLexVerbatimLine(Token &
T,
const char *TextBegin,
545 const CommandInfo *Info) {
546 assert(Info->IsVerbatimLineCommand);
548 T.setVerbatimLineID(Info->getID());
550 State = LS_VerbatimLineText;
553void Lexer::lexVerbatimLineText(Token &
T) {
554 assert(State == LS_VerbatimLineText);
557 const char *Newline = findNewline(BufferPtr, CommentEnd);
558 StringRef
Text(BufferPtr, Newline - BufferPtr);
560 T.setVerbatimLineText(
Text);
565void Lexer::lexHTMLCharacterReference(Token &
T) {
566 const char *TokenPtr = BufferPtr;
567 assert(*TokenPtr ==
'&');
569 if (TokenPtr == CommentEnd) {
570 formTextToken(
T, TokenPtr);
575 bool isDecimal =
false;
579 TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
581 }
else if (
C ==
'#') {
583 if (TokenPtr == CommentEnd) {
584 formTextToken(
T, TokenPtr);
590 TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
592 }
else if (
C ==
'x' ||
C ==
'X') {
595 TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
597 formTextToken(
T, TokenPtr);
601 formTextToken(
T, TokenPtr);
604 if (NamePtr == TokenPtr || TokenPtr == CommentEnd ||
606 formTextToken(
T, TokenPtr);
609 StringRef Name(NamePtr, TokenPtr - NamePtr);
613 Resolved = resolveHTMLNamedCharacterReference(Name);
615 Resolved = resolveHTMLDecimalCharacterReference(Name);
617 Resolved = resolveHTMLHexCharacterReference(Name);
619 if (Resolved.empty()) {
620 formTextToken(
T, TokenPtr);
627void Lexer::setupAndLexHTMLStartTag(Token &
T) {
628 assert(BufferPtr[0] ==
'<' &&
629 isHTMLIdentifierStartingCharacter(BufferPtr[1]));
630 const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
631 StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
632 if (!isHTMLTagName(Name)) {
633 formTextToken(
T, TagNameEnd);
638 T.setHTMLTagStartName(Name);
642 const char C = *BufferPtr;
643 if (BufferPtr != CommentEnd &&
644 (
C ==
'>' ||
C ==
'/' || isHTMLIdentifierStartingCharacter(
C)))
645 State = LS_HTMLStartTag;
648void Lexer::lexHTMLStartTag(Token &
T) {
649 assert(State == LS_HTMLStartTag);
651 const char *TokenPtr = BufferPtr;
653 if (isHTMLIdentifierCharacter(
C)) {
654 TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
655 StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
657 T.setHTMLIdent(Ident);
666 const char *OpenQuote = TokenPtr;
667 TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
668 const char *ClosingQuote = TokenPtr;
669 if (TokenPtr != CommentEnd)
672 T.setHTMLQuotedString(StringRef(OpenQuote + 1,
673 ClosingQuote - (OpenQuote + 1)));
683 if (TokenPtr != CommentEnd && *TokenPtr ==
'>') {
687 formTextToken(
T, TokenPtr);
697 if (BufferPtr == CommentEnd) {
703 if (!isHTMLIdentifierStartingCharacter(
C) &&
704 C !=
'=' &&
C !=
'\"' &&
C !=
'\'' &&
C !=
'>' &&
C !=
'/') {
710void Lexer::setupAndLexHTMLEndTag(Token &
T) {
711 assert(BufferPtr[0] ==
'<' && BufferPtr[1] ==
'/');
713 const char *TagNameBegin =
skipWhitespace(BufferPtr + 2, CommentEnd);
714 const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
715 StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin);
716 if (!isHTMLTagName(Name)) {
717 formTextToken(
T, TagNameEnd);
724 T.setHTMLTagEndName(Name);
726 if (BufferPtr != CommentEnd && *BufferPtr ==
'>')
727 State = LS_HTMLEndTag;
730void Lexer::lexHTMLEndTag(Token &
T) {
731 assert(BufferPtr != CommentEnd && *BufferPtr ==
'>');
739 const char *BufferStart,
const char *BufferEnd,
bool ParseCommands)
740 : Allocator(Allocator), Diags(Diags), Traits(Traits),
741 BufferStart(BufferStart), BufferEnd(BufferEnd), BufferPtr(BufferStart),
742 FileLoc(FileLoc), ParseCommands(ParseCommands),
743 CommentState(LCS_BeforeComment), State(LS_Normal) {}
747 switch (CommentState) {
748 case LCS_BeforeComment:
749 if (BufferPtr == BufferEnd) {
750 formTokenWithChars(
T, BufferPtr,
tok::eof);
754 assert(*BufferPtr ==
'/');
760 if (BufferPtr != BufferEnd) {
765 const char C = *BufferPtr;
766 if (
C ==
'/' ||
C ==
'!')
773 if (BufferPtr != BufferEnd && *BufferPtr ==
'<')
776 CommentState = LCS_InsideBCPLComment;
777 if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
779 CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
786 const char C = *BufferPtr;
787 if ((
C ==
'*' && *(BufferPtr + 1) !=
'/') ||
C ==
'!')
791 if (BufferPtr != BufferEnd && *BufferPtr ==
'<')
794 CommentState = LCS_InsideCComment;
796 CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
800 llvm_unreachable(
"second character of comment should be '/' or '*'");
803 case LCS_BetweenComments: {
806 const char *EndWhitespace = BufferPtr;
807 while(EndWhitespace != BufferEnd && *EndWhitespace !=
'/')
816 CommentState = LCS_BeforeComment;
820 case LCS_InsideBCPLComment:
821 case LCS_InsideCComment:
822 if (BufferPtr != CommentEnd) {
827 if (CommentState == LCS_InsideCComment) {
828 assert(BufferPtr[0] ==
'*' && BufferPtr[1] ==
'/');
830 assert(BufferPtr <= BufferEnd);
836 CommentState = LCS_BetweenComments;
840 CommentState = LCS_BetweenComments;
852 bool InvalidTemp =
false;
857 const char *
Begin =
File.data() + LocInfo.second;
enum clang::sema::@1655::IndirectLocalPathEntry::EntryKind Kind
static bool isNamed(const NamedDecl *ND, const char(&Str)[Len])
static unsigned skipNewline(const char *&First, const char *End)
static unsigned skipWhitespace(unsigned Idx, StringRef Str, unsigned Length)
Skip over whitespace in the string, starting at the given index.
Concrete class used by the front-end to report problems and issues.
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
Encodes a location in the source.
void print(raw_ostream &OS, const SourceManager &SM) const
This class handles loading and caching of source files into memory.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
LLVM_READONLY bool isLetter(unsigned char c)
Return true if this character is an ASCII letter: [a-zA-Z].
LLVM_READONLY bool isAlphanumeric(unsigned char c)
Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
LLVM_READONLY bool isDigit(unsigned char c)
Return true if this character is an ASCII digit: [0-9].
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isHexDigit(unsigned char c)
Return true if this character is an ASCII hex digit: [0-9a-fA-F].
const FunctionProtoType * T