clang 20.0.0git
MacroArgs.cpp
Go to the documentation of this file.
1//===--- MacroArgs.cpp - Formal argument info for Macros ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the MacroArgs interface.
10//
11//===----------------------------------------------------------------------===//
12
13#include "clang/Lex/MacroArgs.h"
15#include "clang/Lex/MacroInfo.h"
17#include "llvm/Support/SaveAndRestore.h"
18#include <algorithm>
19
20using namespace clang;
21
22/// MacroArgs ctor function - This destroys the vector passed in.
24 ArrayRef<Token> UnexpArgTokens,
25 bool VarargsElided, Preprocessor &PP) {
26 assert(MI->isFunctionLike() &&
27 "Can't have args for an object-like macro!");
28 MacroArgs **ResultEnt = nullptr;
29 unsigned ClosestMatch = ~0U;
30
31 // See if we have an entry with a big enough argument list to reuse on the
32 // free list. If so, reuse it.
33 for (MacroArgs **Entry = &PP.MacroArgCache; *Entry;
34 Entry = &(*Entry)->ArgCache) {
35 if ((*Entry)->NumUnexpArgTokens >= UnexpArgTokens.size() &&
36 (*Entry)->NumUnexpArgTokens < ClosestMatch) {
37 ResultEnt = Entry;
38
39 // If we have an exact match, use it.
40 if ((*Entry)->NumUnexpArgTokens == UnexpArgTokens.size())
41 break;
42 // Otherwise, use the best fit.
43 ClosestMatch = (*Entry)->NumUnexpArgTokens;
44 }
45 }
47 if (!ResultEnt) {
48 // Allocate memory for a MacroArgs object with the lexer tokens at the end,
49 // and construct the MacroArgs object.
50 Result = new (
51 llvm::safe_malloc(totalSizeToAlloc<Token>(UnexpArgTokens.size())))
52 MacroArgs(UnexpArgTokens.size(), VarargsElided, MI->getNumParams());
53 } else {
54 Result = *ResultEnt;
55 // Unlink this node from the preprocessors singly linked list.
56 *ResultEnt = Result->ArgCache;
57 Result->NumUnexpArgTokens = UnexpArgTokens.size();
58 Result->VarargsElided = VarargsElided;
59 Result->NumMacroArgs = MI->getNumParams();
60 }
61
62 // Copy the actual unexpanded tokens to immediately after the result ptr.
63 if (!UnexpArgTokens.empty()) {
64 static_assert(std::is_trivial_v<Token>,
65 "assume trivial copyability if copying into the "
66 "uninitialized array (as opposed to reusing a cached "
67 "MacroArgs)");
68 std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(),
69 Result->getTrailingObjects<Token>());
70 }
71
72 return Result;
73}
74
75/// destroy - Destroy and deallocate the memory for this object.
76///
78 // Don't clear PreExpArgTokens, just clear the entries. Clearing the entries
79 // would deallocate the element vectors.
80 for (unsigned i = 0, e = PreExpArgTokens.size(); i != e; ++i)
81 PreExpArgTokens[i].clear();
82
83 // Add this to the preprocessor's free list.
84 ArgCache = PP.MacroArgCache;
85 PP.MacroArgCache = this;
86}
87
88/// deallocate - This should only be called by the Preprocessor when managing
89/// its freelist.
91 MacroArgs *Next = ArgCache;
92
93 // Run the dtor to deallocate the vectors.
94 this->~MacroArgs();
95 // Release the memory for the object.
96 static_assert(std::is_trivially_destructible_v<Token>,
97 "assume trivially destructible and forego destructors");
98 free(this);
99
100 return Next;
101}
102
103
104/// getArgLength - Given a pointer to an expanded or unexpanded argument,
105/// return the number of tokens, not counting the EOF, that make up the
106/// argument.
107unsigned MacroArgs::getArgLength(const Token *ArgPtr) {
108 unsigned NumArgTokens = 0;
109 for (; ArgPtr->isNot(tok::eof); ++ArgPtr)
110 ++NumArgTokens;
111 return NumArgTokens;
112}
113
114
115/// getUnexpArgument - Return the unexpanded tokens for the specified formal.
116///
117const Token *MacroArgs::getUnexpArgument(unsigned Arg) const {
118
119 assert(Arg < getNumMacroArguments() && "Invalid arg #");
120 // The unexpanded argument tokens start immediately after the MacroArgs object
121 // in memory.
122 const Token *Start = getTrailingObjects<Token>();
123 const Token *Result = Start;
124
125 // Scan to find Arg.
126 for (; Arg; ++Result) {
127 assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
128 if (Result->is(tok::eof))
129 --Arg;
130 }
131 assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
132 return Result;
133}
134
136 Preprocessor &PP) {
137 if (!MI->isVariadic())
138 return false;
139 const int VariadicArgIndex = getNumMacroArguments() - 1;
140 return getPreExpArgument(VariadicArgIndex, PP).front().isNot(tok::eof);
141}
142
143/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
144/// by pre-expansion, return false. Otherwise, conservatively return true.
146 Preprocessor &PP) const {
147 // If there are no identifiers in the argument list, or if the identifiers are
148 // known to not be macros, pre-expansion won't modify it.
149 for (; ArgTok->isNot(tok::eof); ++ArgTok)
150 if (IdentifierInfo *II = ArgTok->getIdentifierInfo())
151 if (II->hasMacroDefinition())
152 // Return true even though the macro could be a function-like macro
153 // without a following '(' token, or could be disabled, or not visible.
154 return true;
155 return false;
156}
157
158/// getPreExpArgument - Return the pre-expanded form of the specified
159/// argument.
160const std::vector<Token> &MacroArgs::getPreExpArgument(unsigned Arg,
161 Preprocessor &PP) {
162 assert(Arg < getNumMacroArguments() && "Invalid argument number!");
163
164 // If we have already computed this, return it.
165 if (PreExpArgTokens.size() < getNumMacroArguments())
166 PreExpArgTokens.resize(getNumMacroArguments());
167
168 std::vector<Token> &Result = PreExpArgTokens[Arg];
169 if (!Result.empty()) return Result;
170
171 SaveAndRestore PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true);
172
173 const Token *AT = getUnexpArgument(Arg);
174 unsigned NumToks = getArgLength(AT)+1; // Include the EOF.
175
176 // Otherwise, we have to pre-expand this argument, populating Result. To do
177 // this, we set up a fake TokenLexer to lex from the unexpanded argument
178 // list. With this installed, we lex expanded tokens until we hit the EOF
179 // token at the end of the unexp list.
180 PP.EnterTokenStream(AT, NumToks, false /*disable expand*/,
181 false /*owns tokens*/, false /*is reinject*/);
182
183 // Lex all of the macro-expanded tokens into Result.
184 do {
185 Result.push_back(Token());
186 Token &Tok = Result.back();
187 PP.Lex(Tok);
188 } while (Result.back().isNot(tok::eof));
189
190 // Pop the token stream off the top of the stack. We know that the internal
191 // pointer inside of it is to the "end" of the token stream, but the stack
192 // will not otherwise be popped until the next token is lexed. The problem is
193 // that the token may be lexed sometime after the vector of tokens itself is
194 // destroyed, which would be badness.
195 if (PP.InCachingLexMode())
196 PP.ExitCachingLexMode();
198 return Result;
199}
200
201
202/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
203/// tokens into the literal string token that should be produced by the C #
204/// preprocessor operator. If Charify is true, then it should be turned into
205/// a character literal for the Microsoft charize (#@) extension.
206///
208 Preprocessor &PP, bool Charify,
209 SourceLocation ExpansionLocStart,
210 SourceLocation ExpansionLocEnd) {
211 Token Tok;
212 Tok.startToken();
213 Tok.setKind(Charify ? tok::char_constant : tok::string_literal);
214
215 const Token *ArgTokStart = ArgToks;
216
217 // Stringify all the tokens.
219 Result += "\"";
220
221 bool isFirst = true;
222 for (; ArgToks->isNot(tok::eof); ++ArgToks) {
223 const Token &Tok = *ArgToks;
224 if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine()))
225 Result += ' ';
226 isFirst = false;
227
228 // If this is a string or character constant, escape the token as specified
229 // by 6.10.3.2p2.
230 if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc.
231 Tok.is(tok::char_constant) || // 'x'
232 Tok.is(tok::wide_char_constant) || // L'x'.
233 Tok.is(tok::utf8_char_constant) || // u8'x'.
234 Tok.is(tok::utf16_char_constant) || // u'x'.
235 Tok.is(tok::utf32_char_constant)) { // U'x'.
236 bool Invalid = false;
237 std::string TokStr = PP.getSpelling(Tok, &Invalid);
238 if (!Invalid) {
239 std::string Str = Lexer::Stringify(TokStr);
240 Result.append(Str.begin(), Str.end());
241 }
242 } else if (Tok.is(tok::code_completion)) {
244 } else {
245 // Otherwise, just append the token. Do some gymnastics to get the token
246 // in place and avoid copies where possible.
247 unsigned CurStrLen = Result.size();
248 Result.resize(CurStrLen+Tok.getLength());
249 const char *BufPtr = Result.data() + CurStrLen;
250 bool Invalid = false;
251 unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr, &Invalid);
252
253 if (!Invalid) {
254 // If getSpelling returned a pointer to an already uniqued version of
255 // the string instead of filling in BufPtr, memcpy it onto our string.
256 if (ActualTokLen && BufPtr != &Result[CurStrLen])
257 memcpy(&Result[CurStrLen], BufPtr, ActualTokLen);
258
259 // If the token was dirty, the spelling may be shorter than the token.
260 if (ActualTokLen != Tok.getLength())
261 Result.resize(CurStrLen+ActualTokLen);
262 }
263 }
264 }
265
266 // If the last character of the string is a \, and if it isn't escaped, this
267 // is an invalid string literal, diagnose it as specified in C99.
268 if (Result.back() == '\\') {
269 // Count the number of consecutive \ characters. If even, then they are
270 // just escaped backslashes, otherwise it's an error.
271 unsigned FirstNonSlash = Result.size()-2;
272 // Guaranteed to find the starting " if nothing else.
273 while (Result[FirstNonSlash] == '\\')
274 --FirstNonSlash;
275 if ((Result.size()-1-FirstNonSlash) & 1) {
276 // Diagnose errors for things like: #define F(X) #X / F(\‍)
277 PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal);
278 Result.pop_back(); // remove one of the \'s.
279 }
280 }
281 Result += '"';
282
283 // If this is the charify operation and the result is not a legal character
284 // constant, diagnose it.
285 if (Charify) {
286 // First step, turn double quotes into single quotes:
287 Result[0] = '\'';
288 Result[Result.size()-1] = '\'';
289
290 // Check for bogus character.
291 bool isBad = false;
292 if (Result.size() == 3)
293 isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above.
294 else
295 isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x'
296
297 if (isBad) {
298 PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify);
299 Result = "' '"; // Use something arbitrary, but legal.
300 }
301 }
302
303 PP.CreateString(Result, Tok,
304 ExpansionLocStart, ExpansionLocEnd);
305 return Tok;
306}
Defines the clang::MacroInfo and clang::MacroDirective classes.
Defines the clang::Preprocessor interface.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
One of these records is kept for each identifier that is lexed.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
Definition: Lexer.cpp:309
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition: MacroArgs.h:30
const Token * getUnexpArgument(unsigned Arg) const
getUnexpArgument - Return a pointer to the first token of the unexpanded token list for the specified...
Definition: MacroArgs.cpp:117
MacroArgs * deallocate()
deallocate - This should only be called by the Preprocessor when managing its freelist.
Definition: MacroArgs.cpp:90
const std::vector< Token > & getPreExpArgument(unsigned Arg, Preprocessor &PP)
getPreExpArgument - Return the pre-expanded form of the specified argument.
Definition: MacroArgs.cpp:160
static MacroArgs * create(const MacroInfo *MI, ArrayRef< Token > UnexpArgTokens, bool VarargsElided, Preprocessor &PP)
MacroArgs ctor function - Create a new MacroArgs object with the specified macro and argument info.
Definition: MacroArgs.cpp:23
static unsigned getArgLength(const Token *ArgPtr)
getArgLength - Given a pointer to an expanded or unexpanded argument, return the number of tokens,...
Definition: MacroArgs.cpp:107
bool ArgNeedsPreexpansion(const Token *ArgTok, Preprocessor &PP) const
ArgNeedsPreexpansion - If we can prove that the argument won't be affected by pre-expansion,...
Definition: MacroArgs.cpp:145
bool invokedWithVariadicArgument(const MacroInfo *const MI, Preprocessor &PP)
Returns true if the macro was defined with a variadic (ellipsis) parameter AND was invoked with at le...
Definition: MacroArgs.cpp:135
unsigned getNumMacroArguments() const
getNumMacroArguments - Return the number of arguments the invoked macro expects.
Definition: MacroArgs.h:95
static Token StringifyArgument(const Token *ArgToks, Preprocessor &PP, bool Charify, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd)
StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of tokens into the literal string...
Definition: MacroArgs.cpp:207
void destroy(Preprocessor &PP)
destroy - Destroy and deallocate the memory for this object.
Definition: MacroArgs.cpp:77
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:39
bool isFunctionLike() const
Definition: MacroInfo.h:201
unsigned getNumParams() const
Definition: MacroInfo.h:184
bool isVariadic() const
Definition: MacroInfo.h:209
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:138
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
void Lex(Token &Result)
Lex the next token for this preprocessor.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
void RemoveTopOfLexerStack()
Pop the current lexer/macro exp off the top of the lexer stack.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Encodes a location in the source.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:187
unsigned getLength() const
Definition: Token.h:135
void setKind(tok::TokenKind K)
Definition: Token.h:95
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99
tok::TokenKind getKind() const
Definition: Token.h:94
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:276
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:280
bool isNot(tok::TokenKind K) const
Definition: Token.h:100
void startToken()
Reset all flags to cleared.
Definition: Token.h:177
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
Definition: TokenKinds.h:89
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.