clang 20.0.0git
MacroExpander.cpp
Go to the documentation of this file.
1//===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of MacroExpander, which handles macro
11/// configuration and expansion while formatting.
12///
13//===----------------------------------------------------------------------===//
14
15#include "Macros.h"
16
17#include "Encoding.h"
18#include "FormatToken.h"
19#include "FormatTokenLexer.h"
21#include "clang/Format/Format.h"
24#include "clang/Lex/Lexer.h"
28#include "llvm/ADT/StringSet.h"
29#include "llvm/Support/ErrorHandling.h"
30
31namespace clang {
32namespace format {
33
35 StringRef Name;
38
39 // Map from each argument's name to its position in the argument list.
40 // With "M(x, y) x + y":
41 // x -> 0
42 // y -> 1
43 llvm::StringMap<size_t> ArgMap;
44
45 bool ObjectLike = true;
46};
47
49public:
50 DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
51 assert(!Tokens.empty());
52 Current = Tokens[0];
53 }
54
55 // Parse the token stream and return the corresponding Definition object.
56 // Returns an empty definition object with a null-Name on error.
58 if (Current->isNot(tok::identifier))
59 return {};
60 Def.Name = Current->TokenText;
61 nextToken();
62 if (Current->is(tok::l_paren)) {
63 Def.ObjectLike = false;
64 if (!parseParams())
65 return {};
66 }
67 if (!parseExpansion())
68 return {};
69
70 return Def;
71 }
72
73private:
74 bool parseParams() {
75 assert(Current->is(tok::l_paren));
76 nextToken();
77 while (Current->is(tok::identifier)) {
78 Def.Params.push_back(Current);
79 Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
80 nextToken();
81 if (Current->isNot(tok::comma))
82 break;
83 nextToken();
84 }
85 if (Current->isNot(tok::r_paren))
86 return false;
87 nextToken();
88 return true;
89 }
90
91 bool parseExpansion() {
92 if (!Current->isOneOf(tok::equal, tok::eof))
93 return false;
94 if (Current->is(tok::equal))
95 nextToken();
96 parseTail();
97 return true;
98 }
99
100 void parseTail() {
101 while (Current->isNot(tok::eof)) {
102 Def.Body.push_back(Current);
103 nextToken();
104 }
105 Def.Body.push_back(Current);
106 }
107
108 void nextToken() {
109 if (Pos + 1 < Tokens.size())
110 ++Pos;
111 Current = Tokens[Pos];
112 Current->Finalized = true;
113 }
114
115 size_t Pos = 0;
116 FormatToken *Current = nullptr;
117 Definition Def;
118 ArrayRef<FormatToken *> Tokens;
119};
120
122 const std::vector<std::string> &Macros, SourceManager &SourceMgr,
123 const FormatStyle &Style,
124 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
125 IdentifierTable &IdentTable)
126 : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
127 IdentTable(IdentTable) {
128 for (const std::string &Macro : Macros)
129 parseDefinition(Macro);
130}
131
133
134void MacroExpander::parseDefinition(const std::string &Macro) {
135 Buffers.push_back(
136 llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
137 FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
138 FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
139 Allocator, IdentTable);
140 const auto Tokens = Lex.lex();
141 if (!Tokens.empty()) {
142 DefinitionParser Parser(Tokens);
143 auto Definition = Parser.parse();
144 if (Definition.ObjectLike) {
145 ObjectLike[Definition.Name] = std::move(Definition);
146 } else {
147 FunctionLike[Definition.Name][Definition.Params.size()] =
148 std::move(Definition);
149 }
150 }
151}
152
153bool MacroExpander::defined(StringRef Name) const {
154 return FunctionLike.contains(Name) || ObjectLike.contains(Name);
155}
156
157bool MacroExpander::objectLike(StringRef Name) const {
158 return ObjectLike.contains(Name);
159}
160
161bool MacroExpander::hasArity(StringRef Name, unsigned Arity) const {
162 auto it = FunctionLike.find(Name);
163 return it != FunctionLike.end() && it->second.contains(Arity);
164}
165
168 std::optional<ArgsList> OptionalArgs) const {
169 if (OptionalArgs)
170 assert(hasArity(ID->TokenText, OptionalArgs->size()));
171 else
172 assert(objectLike(ID->TokenText));
173 const Definition &Def = OptionalArgs
174 ? FunctionLike.find(ID->TokenText)
175 ->second.find(OptionalArgs.value().size())
176 ->second
177 : ObjectLike.find(ID->TokenText)->second;
178 ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList();
180 // Expand each argument at most once.
181 llvm::StringSet<> ExpandedArgs;
182
183 // Adds the given token to Result.
184 auto pushToken = [&](FormatToken *Tok) {
185 Tok->MacroCtx->ExpandedFrom.push_back(ID);
186 Result.push_back(Tok);
187 };
188
189 // If Tok references a parameter, adds the corresponding argument to Result.
190 // Returns false if Tok does not reference a parameter.
191 auto expandArgument = [&](FormatToken *Tok) -> bool {
192 // If the current token references a parameter, expand the corresponding
193 // argument.
194 if (Tok->isNot(tok::identifier))
195 return false;
196 if (!ExpandedArgs.insert(Tok->TokenText).second)
197 return false;
198 auto I = Def.ArgMap.find(Tok->TokenText);
199 if (I == Def.ArgMap.end())
200 return false;
201 // If there are fewer arguments than referenced parameters, treat the
202 // parameter as empty.
203 // FIXME: Potentially fully abort the expansion instead.
204 if (I->getValue() >= Args.size())
205 return true;
206 for (FormatToken *Arg : Args[I->getValue()]) {
207 // A token can be part of a macro argument at multiple levels.
208 // For example, with "ID(x) x":
209 // in ID(ID(x)), 'x' is expanded first as argument to the inner
210 // ID, then again as argument to the outer ID. We keep the macro
211 // role the token had from the inner expansion.
212 if (!Arg->MacroCtx)
213 Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
214 pushToken(Arg);
215 }
216 return true;
217 };
218
219 // Expand the definition into Result.
220 for (FormatToken *Tok : Def.Body) {
221 if (expandArgument(Tok))
222 continue;
223 // Create a copy of the tokens from the macro body, i.e. were not provided
224 // by user code.
225 FormatToken *New = new (Allocator.Allocate()) FormatToken;
226 New->copyFrom(*Tok);
227 assert(!New->MacroCtx);
228 // Tokens that are not part of the user code are not formatted.
230 pushToken(New);
231 }
232 assert(Result.size() >= 1 && Result.back()->is(tok::eof));
233 if (Result.size() > 1) {
234 ++Result[0]->MacroCtx->StartOfExpansion;
235 ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
236 } else {
237 // If the macro expansion is empty, mark the start and end.
238 Result[0]->MacroCtx->StartOfExpansion = 1;
239 Result[0]->MacroCtx->EndOfExpansion = 1;
240 }
241 return Result;
242}
243
244} // namespace format
245} // namespace clang
static char ID
Definition: Arena.cpp:183
Contains functions for text encoding manipulation.
This file contains FormatTokenLexer, which tokenizes a source file into a token stream suitable for C...
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
Various functions to configurably format source code.
This file contains the main building blocks of macro support in clang-format.
Defines the clang::Preprocessor interface.
Defines the clang::TokenKind enum and support functions.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Implements an efficient mapping from strings to IdentifierInfo nodes.
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:58
This class handles loading and caching of source files into memory.
FileID createFileID(FileEntryRef SourceFile, SourceLocation IncludePos, SrcMgr::CharacteristicKind FileCharacter, int LoadedID=0, SourceLocation::UIntTy LoadedOffset=0)
Create a new FileID that represents the specified file being #included from the specified IncludePosi...
DefinitionParser(ArrayRef< FormatToken * > Tokens)
bool objectLike(StringRef Name) const
Returns whetherh there is an object-like overload, i.e.
SmallVector< FormatToken *, 8 > expand(FormatToken *ID, std::optional< ArgsList > OptionalArgs) const
Returns the expanded stream of format tokens for ID, where each element in Args is a positional argum...
ArrayRef< SmallVector< FormatToken *, 8 > > ArgsList
Definition: Macros.h:82
bool hasArity(StringRef Name, unsigned Arity) const
Returns whether macro Name provides an overload with the given arity.
MacroExpander(const std::vector< std::string > &Macros, SourceManager &SourceMgr, const FormatStyle &Style, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)
Construct a macro expander from a set of macro definitions.
bool defined(StringRef Name) const
Returns whether any macro Name is defined, regardless of overloads.
@ MR_Hidden
The token was expanded from a macro definition, and is not visible as part of the macro call.
Definition: FormatToken.h:237
@ MR_ExpandedArg
The token was expanded from a macro argument when formatting the expanded token sequence.
Definition: FormatToken.h:231
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:294
bool isNot(T Kind) const
Definition: FormatToken.h:628
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:314
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:373
std::optional< MacroExpansion > MacroCtx
Definition: FormatToken.h:598
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:609
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:621
void copyFrom(const FormatToken &Tok)
Definition: FormatToken.h:894
SmallVector< FormatToken *, 8 > Params
SmallVector< FormatToken *, 8 > Body
Contains information on the token's role in a macro expansion.
Definition: FormatToken.h:267