clang 20.0.0git
WhitespaceManager.h
Go to the documentation of this file.
1//===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// WhitespaceManager class manages whitespace around tokens and their
11/// replacements.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
16#define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
17
18#include "TokenAnnotator.h"
20
21namespace clang {
22namespace format {
23
24/// Manages the whitespaces around tokens and their replacements.
25///
26/// This includes special handling for certain constructs, e.g. the alignment of
27/// trailing line comments.
28///
29/// To guarantee correctness of alignment operations, the \c WhitespaceManager
30/// must be informed about every token in the source file; for each token, there
31/// must be exactly one call to either \c replaceWhitespace or
32/// \c addUntouchableToken.
33///
34/// There may be multiple calls to \c breakToken for a given token.
36public:
37 WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
38 bool UseCRLF)
39 : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
40
41 bool useCRLF() const { return UseCRLF; }
42
43 /// Infers whether the input is using CRLF.
44 static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF);
45
46 /// Replaces the whitespace in front of \p Tok. Only call once for
47 /// each \c AnnotatedToken.
48 ///
49 /// \p StartOfTokenColumn is the column at which the token will start after
50 /// this replacement. It is needed for determining how \p Spaces is turned
51 /// into tabs and spaces for some format styles.
52 void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
53 unsigned StartOfTokenColumn, bool IsAligned = false,
54 bool InPPDirective = false);
55
56 /// Adds information about an unchangeable token's whitespace.
57 ///
58 /// Needs to be called for every token for which \c replaceWhitespace
59 /// was not called.
60 void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
61
62 llvm::Error addReplacement(const tooling::Replacement &Replacement);
63
64 /// Inserts or replaces whitespace in the middle of a token.
65 ///
66 /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
67 /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
68 /// characters.
69 ///
70 /// Note: \p Spaces can be negative to retain information about initial
71 /// relative column offset between a line of a block comment and the start of
72 /// the comment. This negative offset may be compensated by trailing comment
73 /// alignment here. In all other cases negative \p Spaces will be truncated to
74 /// 0.
75 ///
76 /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
77 /// used to align backslashes correctly.
78 void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
79 unsigned ReplaceChars,
80 StringRef PreviousPostfix,
81 StringRef CurrentPrefix, bool InPPDirective,
82 unsigned Newlines, int Spaces);
83
84 /// Returns all the \c Replacements created during formatting.
86
87 /// Represents a change before a token, a break inside a token,
88 /// or the layout of an unchanged token (or whitespace within).
89 struct Change {
90 /// Functor to sort changes in original source order.
92 public:
93 IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
94 bool operator()(const Change &C1, const Change &C2) const;
95
96 private:
97 const SourceManager &SourceMgr;
98 };
99
100 /// Creates a \c Change.
101 ///
102 /// The generated \c Change will replace the characters at
103 /// \p OriginalWhitespaceRange with a concatenation of
104 /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
105 /// and \p CurrentLinePrefix.
106 ///
107 /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
108 /// trailing comments and escaped newlines.
111 unsigned StartOfTokenColumn, unsigned NewlinesBefore,
112 StringRef PreviousLinePostfix, StringRef CurrentLinePrefix,
114
115 // The kind of the token whose whitespace this change replaces, or in which
116 // this change inserts whitespace.
117 // FIXME: Currently this is not set correctly for breaks inside comments, as
118 // the \c BreakableToken is still doing its own alignment.
120
122 // Changes might be in the middle of a token, so we cannot just keep the
123 // FormatToken around to query its information.
128 std::string CurrentLinePrefix;
131
132 // The number of spaces in front of the token or broken part of the token.
133 // This will be adapted when aligning tokens.
134 // Can be negative to retain information about the initial relative offset
135 // of the lines in a block comment. This is used when aligning trailing
136 // comments. Uncompensated negative offset is truncated to 0.
138
139 // If this change is inside of a token but not at the start of the token or
140 // directly after a newline.
142
143 // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
144 // \c EscapedNewlineColumn will be calculated in
145 // \c calculateLineBreakInformation.
147 unsigned TokenLength;
150
151 // These fields are used to retain correct relative line indentation in a
152 // block comment when aligning trailing comments.
153 //
154 // If this Change represents a continuation of a block comment,
155 // \c StartOfBlockComment is pointer to the first Change in the block
156 // comment. \c IndentationOffset is a relative column offset to this
157 // change, so that the correct column can be reconstructed at the end of
158 // the alignment process.
161
162 // Depth of conditionals. Computed from tracking fake parenthesis, except
163 // it does not increase the indent for "chained" conditionals.
165
166 // A combination of indent, nesting and conditionals levels, which are used
167 // in tandem to compute lexical scope, for the purposes of deciding
168 // when to stop consecutive alignment runs.
169 std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const {
170 return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel,
172 }
173 };
174
175private:
176 struct CellDescription {
177 unsigned Index = 0;
178 unsigned Cell = 0;
179 unsigned EndIndex = 0;
180 bool HasSplit = false;
181 CellDescription *NextColumnElement = nullptr;
182
183 constexpr bool operator==(const CellDescription &Other) const {
184 return Index == Other.Index && Cell == Other.Cell &&
185 EndIndex == Other.EndIndex;
186 }
187 constexpr bool operator!=(const CellDescription &Other) const {
188 return !(*this == Other);
189 }
190 };
191
192 struct CellDescriptions {
193 SmallVector<CellDescription> Cells;
194 SmallVector<unsigned> CellCounts;
195 unsigned InitialSpaces = 0;
196
197 // Determine if every row in the array
198 // has the same number of columns.
199 bool isRectangular() const {
200 if (CellCounts.size() < 2)
201 return false;
202
203 for (auto NumberOfColumns : CellCounts)
204 if (NumberOfColumns != CellCounts[0])
205 return false;
206 return true;
207 }
208 };
209
210 /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
211 /// or token parts in a line and \c PreviousEndOfTokenColumn and
212 /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
213 void calculateLineBreakInformation();
214
215 /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes.
216 void alignConsecutiveMacros();
217
218 /// Align consecutive assignments over all \c Changes.
219 void alignConsecutiveAssignments();
220
221 /// Align consecutive bitfields over all \c Changes.
222 void alignConsecutiveBitFields();
223
224 /// Align consecutive colon. For bitfields, TableGen DAGArgs and defintions.
225 void
226 alignConsecutiveColons(const FormatStyle::AlignConsecutiveStyle &AlignStyle,
227 TokenType Type);
228
229 /// Align consecutive declarations over all \c Changes.
230 void alignConsecutiveDeclarations();
231
232 /// Align consecutive declarations over all \c Changes.
233 void alignChainedConditionals();
234
235 /// Align consecutive short case statements over all \c Changes.
236 void alignConsecutiveShortCaseStatements(bool IsExpr);
237
238 /// Align consecutive TableGen DAGArg colon over all \c Changes.
239 void alignConsecutiveTableGenBreakingDAGArgColons();
240
241 /// Align consecutive TableGen cond operator colon over all \c Changes.
242 void alignConsecutiveTableGenCondOperatorColons();
243
244 /// Align consecutive TableGen definitions over all \c Changes.
245 void alignConsecutiveTableGenDefinitions();
246
247 /// Align trailing comments over all \c Changes.
248 void alignTrailingComments();
249
250 /// Align trailing comments from change \p Start to change \p End at
251 /// the specified \p Column.
252 void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
253
254 /// Align escaped newlines over all \c Changes.
255 void alignEscapedNewlines();
256
257 /// Align escaped newlines from change \p Start to change \p End at
258 /// the specified \p Column.
259 void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
260
261 /// Align Array Initializers over all \c Changes.
262 void alignArrayInitializers();
263
264 /// Align Array Initializers from change \p Start to change \p End at
265 /// the specified \p Column.
266 void alignArrayInitializers(unsigned Start, unsigned End);
267
268 /// Align Array Initializers being careful to right justify the columns
269 /// as described by \p CellDescs.
270 void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs);
271
272 /// Align Array Initializers being careful to left justify the columns
273 /// as described by \p CellDescs.
274 void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs);
275
276 /// Calculate the cell width between two indexes.
277 unsigned calculateCellWidth(unsigned Start, unsigned End,
278 bool WithSpaces = false) const;
279
280 /// Get a set of fully specified CellDescriptions between \p Start and
281 /// \p End of the change list.
282 CellDescriptions getCells(unsigned Start, unsigned End);
283
284 /// Does this \p Cell contain a split element?
285 static bool isSplitCell(const CellDescription &Cell);
286
287 /// Get the width of the preceding cells from \p Start to \p End.
288 template <typename I>
289 auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const {
290 auto NetWidth = InitialSpaces;
291 for (auto PrevIter = Start; PrevIter != End; ++PrevIter) {
292 // If we broke the line the initial spaces are already
293 // accounted for.
294 assert(PrevIter->Index < Changes.size());
295 if (Changes[PrevIter->Index].NewlinesBefore > 0)
296 NetWidth = 0;
297 NetWidth +=
298 calculateCellWidth(PrevIter->Index, PrevIter->EndIndex, true) + 1;
299 }
300 return NetWidth;
301 }
302
303 /// Get the maximum width of a cell in a sequence of columns.
304 template <typename I>
305 unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const {
306 unsigned CellWidth =
307 calculateCellWidth(CellIter->Index, CellIter->EndIndex, true);
308 if (Changes[CellIter->Index].NewlinesBefore == 0)
309 CellWidth += NetWidth;
310 for (const auto *Next = CellIter->NextColumnElement; Next;
311 Next = Next->NextColumnElement) {
312 auto ThisWidth = calculateCellWidth(Next->Index, Next->EndIndex, true);
313 if (Changes[Next->Index].NewlinesBefore == 0)
314 ThisWidth += NetWidth;
315 CellWidth = std::max(CellWidth, ThisWidth);
316 }
317 return CellWidth;
318 }
319
320 /// Get The maximum width of all columns to a given cell.
321 template <typename I>
322 unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop,
323 unsigned InitialSpaces, unsigned CellCount,
324 unsigned MaxRowCount) const {
325 auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces);
326 auto RowCount = 1U;
327 auto Offset = std::distance(CellStart, CellStop);
328 for (const auto *Next = CellStop->NextColumnElement; Next;
329 Next = Next->NextColumnElement) {
330 if (RowCount >= MaxRowCount)
331 break;
332 auto Start = (CellStart + RowCount * CellCount);
333 auto End = Start + Offset;
334 MaxNetWidth =
335 std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces));
336 ++RowCount;
337 }
338 return MaxNetWidth;
339 }
340
341 /// Align a split cell with a newline to the first element in the cell.
342 void alignToStartOfCell(unsigned Start, unsigned End);
343
344 /// Link the Cell pointers in the list of Cells.
345 static CellDescriptions linkCells(CellDescriptions &&CellDesc);
346
347 /// Fill \c Replaces with the replacements for all effective changes.
348 void generateChanges();
349
350 /// Stores \p Text as the replacement for the whitespace in \p Range.
351 void storeReplacement(SourceRange Range, StringRef Text);
352 void appendNewlineText(std::string &Text, const Change &C);
353 void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
354 unsigned PreviousEndOfTokenColumn,
355 unsigned EscapedNewlineColumn);
356 void appendIndentText(std::string &Text, unsigned IndentLevel,
357 unsigned Spaces, unsigned WhitespaceStartColumn,
358 bool IsAligned);
359 unsigned appendTabIndent(std::string &Text, unsigned Spaces,
360 unsigned Indentation);
361
362 SmallVector<Change, 16> Changes;
363 const SourceManager &SourceMgr;
364 tooling::Replacements Replaces;
365 const FormatStyle &Style;
366 bool UseCRLF;
367};
368
369} // namespace format
370} // namespace clang
371
372#endif
StringRef Text
Definition: Format.cpp:3033
SourceRange Range
Definition: SemaObjC.cpp:758
Defines the SourceManager interface.
This file implements a token annotator, i.e.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Functor to sort changes in original source order.
bool operator()(const Change &C1, const Change &C2) const
Manages the whitespaces around tokens and their replacements.
void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, unsigned Newlines, int Spaces)
Inserts or replaces whitespace in the middle of a token.
void addUntouchableToken(const FormatToken &Tok, bool InPPDirective)
Adds information about an unchangeable token's whitespace.
static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF)
Infers whether the input is using CRLF.
llvm::Error addReplacement(const tooling::Replacement &Replacement)
const tooling::Replacements & generateReplacements()
Returns all the Replacements created during formatting.
void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, unsigned StartOfTokenColumn, bool IsAligned=false, bool InPPDirective=false)
Replaces the whitespace in front of Tok.
WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, bool UseCRLF)
A text replacement.
Definition: Replacement.h:83
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:212
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:209
The JSON file list parser is used to communicate input to InstallAPI.
bool operator==(const CallGraphNode::CallRecord &LHS, const CallGraphNode::CallRecord &RHS)
Definition: CallGraph.h:204
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:294
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:517
unsigned IndentLevel
The indent level of this token. Copied from the surrounding line.
Definition: FormatToken.h:520
Represents a change before a token, a break inside a token, or the layout of an unchanged token (or w...
std::tuple< unsigned, unsigned, unsigned > indentAndNestingLevel() const