clang 20.0.0git
TokenAnnotator.h
Go to the documentation of this file.
1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a token annotator, i.e. creates
11/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17
18#include "UnwrappedLineParser.h"
19
20namespace clang {
21namespace format {
22
25 // Contains public/private/protected followed by TT_InheritanceColon.
28 LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
30 LT_ObjCProperty, // An @property line.
36};
37
39 // Contained in child block.
41 // Contained in class declaration/definition.
43 // Contained within other scope block (function, loop, if/else, etc).
45};
46
48public:
50 : First(Line.Tokens.front().Tok), Type(LT_Other), Level(Line.Level),
62 assert(!Line.Tokens.empty());
63
64 // Calculate Next and Previous for all tokens. Note that we must overwrite
65 // Next and Previous for every token, as previous formatting runs might have
66 // left them in a different state.
67 First->Previous = nullptr;
68 FormatToken *Current = First;
69 addChildren(Line.Tokens.front(), Current);
70 for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
71 if (Node.Tok->MacroParent)
72 ContainsMacroCall = true;
73 Current->Next = Node.Tok;
74 Node.Tok->Previous = Current;
75 Current = Current->Next;
76 addChildren(Node, Current);
77 // FIXME: if we add children, previous will point to the token before
78 // the children; changing this requires significant changes across
79 // clang-format.
80 }
81 Last = Current;
82 Last->Next = nullptr;
83 }
84
86 Current->Children.clear();
87 for (const auto &Child : Node.Children) {
88 Children.push_back(new AnnotatedLine(Child));
89 if (Children.back()->ContainsMacroCall)
90 ContainsMacroCall = true;
91 Current->Children.push_back(Children.back());
92 }
93 }
94
95 size_t size() const {
96 size_t Size = 1;
97 for (const auto *Child : Children)
98 Size += Child->size();
99 return Size;
100 }
101
103 for (AnnotatedLine *Child : Children)
104 delete Child;
105 FormatToken *Current = First;
106 while (Current) {
107 Current->Children.clear();
108 Current->Role.reset();
109 Current = Current->Next;
110 }
111 }
112
113 bool isComment() const {
114 return First && First->is(tok::comment) && !First->getNextNonComment();
115 }
116
117 /// \c true if this line starts with the given tokens in order, ignoring
118 /// comments.
119 template <typename... Ts> bool startsWith(Ts... Tokens) const {
120 return First && First->startsSequence(Tokens...);
121 }
122
123 /// \c true if this line ends with the given tokens in reversed order,
124 /// ignoring comments.
125 /// For example, given tokens [T1, T2, T3, ...], the function returns true if
126 /// this line is like "... T3 T2 T1".
127 template <typename... Ts> bool endsWith(Ts... Tokens) const {
128 return Last && Last->endsSequence(Tokens...);
129 }
130
131 /// \c true if this line looks like a function definition instead of a
132 /// function declaration. Asserts MightBeFunctionDecl.
134 assert(MightBeFunctionDecl);
135 // Try to determine if the end of a stream of tokens is either the
136 // Definition or the Declaration for a function. It does this by looking for
137 // the ';' in foo(); and using that it ends with a ; to know this is the
138 // Definition, however the line could end with
139 // foo(); /* comment */
140 // or
141 // foo(); // comment
142 // or
143 // foo() // comment
144 // endsWith() ignores the comment.
145 return !endsWith(tok::semi);
146 }
147
148 /// \c true if this line starts a namespace definition.
149 bool startsWithNamespace() const {
150 return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
151 startsWith(tok::kw_inline, tok::kw_namespace) ||
152 startsWith(tok::kw_export, tok::kw_namespace);
153 }
154
156 assert(First);
157 return First->is(tok::comment) ? First->getNextNonComment() : First;
158 }
159
161 assert(Last);
162 return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last;
163 }
164
167
169
171 unsigned Level;
172 unsigned PPLevel;
181
182 /// \c True if this line contains a macro call for which an expansion exists.
183 bool ContainsMacroCall = false;
184
185 /// \c True if calculateFormattingInformation() has been called on this line.
186 bool Computed = false;
187
188 /// \c True if this line should be formatted, i.e. intersects directly or
189 /// indirectly with one of the input ranges.
191
192 /// \c True if the leading empty lines of this line intersect with one of the
193 /// input ranges.
195
196 /// \c True if one of this line's children intersects with an input range.
198
199 /// \c True if breaking after last attribute group in function return type.
201
202 /// \c True if this line should be indented by ContinuationIndent in addition
203 /// to the normal indention level.
205
207
208private:
209 // Disallow copying.
210 AnnotatedLine(const AnnotatedLine &) = delete;
211 void operator=(const AnnotatedLine &) = delete;
212};
213
214/// Determines extra information about the tokens comprising an
215/// \c UnwrappedLine.
217public:
218 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
219 : Style(Style), IsCpp(Style.isCpp()),
220 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {
221 assert(IsCpp == LangOpts.CXXOperatorNames);
222 }
223
224 /// Adapts the indent levels of comment lines to the indent of the
225 /// subsequent line.
226 // FIXME: Can/should this be done in the UnwrappedLineParser?
228
231
232private:
233 /// Calculate the penalty for splitting before \c Tok.
234 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
235 bool InFunctionDecl) const;
236
237 bool spaceRequiredBeforeParens(const FormatToken &Right) const;
238
239 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
240 const FormatToken &Right) const;
241
242 bool spaceRequiredBefore(const AnnotatedLine &Line,
243 const FormatToken &Right) const;
244
245 bool mustBreakBefore(const AnnotatedLine &Line,
246 const FormatToken &Right) const;
247
248 bool canBreakBefore(const AnnotatedLine &Line,
249 const FormatToken &Right) const;
250
251 bool mustBreakForReturnType(const AnnotatedLine &Line) const;
252
253 void printDebugInfo(const AnnotatedLine &Line) const;
254
255 void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
256
257 void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
258
259 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
260 FormatToken *CurrentToken,
261 unsigned Depth) const;
263 getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
264
265 FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
266 const FormatToken &PointerOrReference) const;
267
268 const FormatStyle &Style;
269
270 bool IsCpp;
271 LangOptions LangOpts;
272
273 const AdditionalKeywords &Keywords;
274
275 SmallVector<ScopeType> Scopes, MacroBodyScopes;
276};
277
278} // end namespace format
279} // end namespace clang
280
281#endif
DynTypedNode Node
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:499
The base class of the type hierarchy.
Definition: Type.h:1828
void addChildren(const UnwrappedLineNode &Node, FormatToken *Current)
bool ReturnTypeWrapped
True if breaking after last attribute group in function return type.
FormatToken * getFirstNonComment() const
bool Computed
True if calculateFormattingInformation() has been called on this line.
bool LeadingEmptyLinesAffected
True if the leading empty lines of this line intersect with one of the input ranges.
bool Affected
True if this line should be formatted, i.e.
AnnotatedLine(const UnwrappedLine &Line)
bool ContainsMacroCall
True if this line contains a macro call for which an expansion exists.
bool mightBeFunctionDefinition() const
true if this line looks like a function definition instead of a function declaration.
bool ChildrenAffected
True if one of this line's children intersects with an input range.
SmallVector< AnnotatedLine *, 0 > Children
bool startsWithNamespace() const
true if this line starts a namespace definition.
bool IsContinuation
True if this line should be indented by ContinuationIndent in addition to the normal indention level.
bool endsWith(Ts... Tokens) const
true if this line ends with the given tokens in reversed order, ignoring comments.
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
FormatToken * getLastNonComment() const
Determines extra information about the tokens comprising an UnwrappedLine.
void calculateFormattingInformation(AnnotatedLine &Line) const
void annotate(AnnotatedLine &Line)
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine * > &Lines) const
Adapts the indent levels of comment lines to the indent of the subsequent line.
LangOptions getFormattingLangOpts(const FormatStyle &Style=getLLVMStyle())
Returns the LangOpts that the formatter expects you to set.
Definition: Format.cpp:3911
@ LT_CommentAbovePPDirective
@ LT_ArrayOfStructInitializer
The JSON file list parser is used to communicate input to InstallAPI.
#define false
Definition: stdbool.h:26
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:1029
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
PointerAlignmentStyle
The &, && and * alignment style.
Definition: Format.h:3661
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:294
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers,...
Definition: FormatToken.h:646
FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:845
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:837
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:566
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:609
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:563
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers,...
Definition: FormatToken.h:657
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...