clang 20.0.0git
ValistChecker.cpp
Go to the documentation of this file.
1//== ValistChecker.cpp - stdarg.h macro usage checker -----------*- C++ -*--==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This defines checkers which detect usage of uninitialized va_list values
10// and va_start calls with no matching va_end.
11//
12//===----------------------------------------------------------------------===//
13
21
22using namespace clang;
23using namespace ento;
24
25REGISTER_SET_WITH_PROGRAMSTATE(InitializedVALists, const MemRegion *)
26
27namespace {
28typedef SmallVector<const MemRegion *, 2> RegionVector;
29
30class ValistChecker : public Checker<check::PreCall, check::PreStmt<VAArgExpr>,
31 check::DeadSymbols> {
32 mutable std::unique_ptr<BugType> BT_leakedvalist, BT_uninitaccess;
33
34 struct VAListAccepter {
36 int VAListPos;
37 };
38 static const SmallVector<VAListAccepter, 15> VAListAccepters;
39 static const CallDescription VaStart, VaEnd, VaCopy;
40
41public:
42 enum CheckKind {
43 CK_Uninitialized,
44 CK_Unterminated,
45 CK_CopyToSelf,
46 CK_NumCheckKinds
47 };
48
49 bool ChecksEnabled[CK_NumCheckKinds] = {false};
50 CheckerNameRef CheckNames[CK_NumCheckKinds];
51
52 void checkPreStmt(const VAArgExpr *VAA, CheckerContext &C) const;
53 void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
54 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
55
56private:
57 const MemRegion *getVAListAsRegion(SVal SV, const Expr *VAExpr,
58 bool &IsSymbolic, CheckerContext &C) const;
59 const ExplodedNode *getStartCallSite(const ExplodedNode *N,
60 const MemRegion *Reg) const;
61
62 void reportUninitializedAccess(const MemRegion *VAList, StringRef Msg,
63 CheckerContext &C) const;
64 void reportLeakedVALists(const RegionVector &LeakedVALists, StringRef Msg1,
65 StringRef Msg2, CheckerContext &C, ExplodedNode *N,
66 bool ReportUninit = false) const;
67
68 void checkVAListStartCall(const CallEvent &Call, CheckerContext &C,
69 bool IsCopy) const;
70 void checkVAListEndCall(const CallEvent &Call, CheckerContext &C) const;
71
72 class ValistBugVisitor : public BugReporterVisitor {
73 public:
74 ValistBugVisitor(const MemRegion *Reg, bool IsLeak = false)
75 : Reg(Reg), IsLeak(IsLeak) {}
76 void Profile(llvm::FoldingSetNodeID &ID) const override {
77 static int X = 0;
78 ID.AddPointer(&X);
79 ID.AddPointer(Reg);
80 }
82 const ExplodedNode *EndPathNode,
83 PathSensitiveBugReport &BR) override {
84 if (!IsLeak)
85 return nullptr;
86
88 // Do not add the statement itself as a range in case of leak.
89 return std::make_shared<PathDiagnosticEventPiece>(L, BR.getDescription(),
90 false);
91 }
92 PathDiagnosticPieceRef VisitNode(const ExplodedNode *N,
94 PathSensitiveBugReport &BR) override;
95
96 private:
97 const MemRegion *Reg;
98 bool IsLeak;
99 };
100};
101
103 ValistChecker::VAListAccepters = {{{CDM::CLibrary, {"vfprintf"}, 3}, 2},
104 {{CDM::CLibrary, {"vfscanf"}, 3}, 2},
105 {{CDM::CLibrary, {"vprintf"}, 2}, 1},
106 {{CDM::CLibrary, {"vscanf"}, 2}, 1},
107 {{CDM::CLibrary, {"vsnprintf"}, 4}, 3},
108 {{CDM::CLibrary, {"vsprintf"}, 3}, 2},
109 {{CDM::CLibrary, {"vsscanf"}, 3}, 2},
110 {{CDM::CLibrary, {"vfwprintf"}, 3}, 2},
111 {{CDM::CLibrary, {"vfwscanf"}, 3}, 2},
112 {{CDM::CLibrary, {"vwprintf"}, 2}, 1},
113 {{CDM::CLibrary, {"vwscanf"}, 2}, 1},
114 {{CDM::CLibrary, {"vswprintf"}, 4}, 3},
115 // vswprintf is the wide version of
116 // vsnprintf, vsprintf has no wide version
117 {{CDM::CLibrary, {"vswscanf"}, 3}, 2}};
118
119const CallDescription ValistChecker::VaStart(CDM::CLibrary,
120 {"__builtin_va_start"}, /*Args=*/2,
121 /*Params=*/1),
122 ValistChecker::VaCopy(CDM::CLibrary, {"__builtin_va_copy"}, 2),
123 ValistChecker::VaEnd(CDM::CLibrary, {"__builtin_va_end"}, 1);
124} // end anonymous namespace
125
126void ValistChecker::checkPreCall(const CallEvent &Call,
127 CheckerContext &C) const {
128 if (VaStart.matches(Call))
129 checkVAListStartCall(Call, C, false);
130 else if (VaCopy.matches(Call))
131 checkVAListStartCall(Call, C, true);
132 else if (VaEnd.matches(Call))
133 checkVAListEndCall(Call, C);
134 else {
135 for (auto FuncInfo : VAListAccepters) {
136 if (!FuncInfo.Func.matches(Call))
137 continue;
138 bool Symbolic;
139 const MemRegion *VAList =
140 getVAListAsRegion(Call.getArgSVal(FuncInfo.VAListPos),
141 Call.getArgExpr(FuncInfo.VAListPos), Symbolic, C);
142 if (!VAList)
143 return;
144
145 if (C.getState()->contains<InitializedVALists>(VAList))
146 return;
147
148 // We did not see va_start call, but the source of the region is unknown.
149 // Be conservative and assume the best.
150 if (Symbolic)
151 return;
152
153 SmallString<80> Errmsg("Function '");
154 Errmsg += FuncInfo.Func.getFunctionName();
155 Errmsg += "' is called with an uninitialized va_list argument";
156 reportUninitializedAccess(VAList, Errmsg.c_str(), C);
157 break;
158 }
159 }
160}
161
162const MemRegion *ValistChecker::getVAListAsRegion(SVal SV, const Expr *E,
163 bool &IsSymbolic,
164 CheckerContext &C) const {
165 const MemRegion *Reg = SV.getAsRegion();
166 if (!Reg)
167 return nullptr;
168 // TODO: In the future this should be abstracted away by the analyzer.
169 bool VaListModelledAsArray = false;
170 if (const auto *Cast = dyn_cast<CastExpr>(E)) {
171 QualType Ty = Cast->getType();
172 VaListModelledAsArray =
173 Ty->isPointerType() && Ty->getPointeeType()->isRecordType();
174 }
175 if (const auto *DeclReg = Reg->getAs<DeclRegion>()) {
176 if (isa<ParmVarDecl>(DeclReg->getDecl()))
177 Reg = C.getState()->getSVal(SV.castAs<Loc>()).getAsRegion();
178 }
179 IsSymbolic = Reg && Reg->getBaseRegion()->getAs<SymbolicRegion>();
180 // Some VarRegion based VA lists reach here as ElementRegions.
181 const auto *EReg = dyn_cast_or_null<ElementRegion>(Reg);
182 return (EReg && VaListModelledAsArray) ? EReg->getSuperRegion() : Reg;
183}
184
185void ValistChecker::checkPreStmt(const VAArgExpr *VAA,
186 CheckerContext &C) const {
187 ProgramStateRef State = C.getState();
188 const Expr *VASubExpr = VAA->getSubExpr();
189 SVal VAListSVal = C.getSVal(VASubExpr);
190 bool Symbolic;
191 const MemRegion *VAList =
192 getVAListAsRegion(VAListSVal, VASubExpr, Symbolic, C);
193 if (!VAList)
194 return;
195 if (Symbolic)
196 return;
197 if (!State->contains<InitializedVALists>(VAList))
198 reportUninitializedAccess(
199 VAList, "va_arg() is called on an uninitialized va_list", C);
200}
201
202void ValistChecker::checkDeadSymbols(SymbolReaper &SR,
203 CheckerContext &C) const {
204 ProgramStateRef State = C.getState();
205 InitializedVAListsTy TrackedVALists = State->get<InitializedVALists>();
206 RegionVector LeakedVALists;
207 for (auto Reg : TrackedVALists) {
208 if (SR.isLiveRegion(Reg))
209 continue;
210 LeakedVALists.push_back(Reg);
211 State = State->remove<InitializedVALists>(Reg);
212 }
213 if (ExplodedNode *N = C.addTransition(State))
214 reportLeakedVALists(LeakedVALists, "Initialized va_list", " is leaked", C,
215 N);
216}
217
218// This function traverses the exploded graph backwards and finds the node where
219// the va_list is initialized. That node is used for uniquing the bug paths.
220// It is not likely that there are several different va_lists that belongs to
221// different stack frames, so that case is not yet handled.
222const ExplodedNode *
223ValistChecker::getStartCallSite(const ExplodedNode *N,
224 const MemRegion *Reg) const {
225 const LocationContext *LeakContext = N->getLocationContext();
226 const ExplodedNode *StartCallNode = N;
227
228 bool FoundInitializedState = false;
229
230 while (N) {
231 ProgramStateRef State = N->getState();
232 if (!State->contains<InitializedVALists>(Reg)) {
233 if (FoundInitializedState)
234 break;
235 } else {
236 FoundInitializedState = true;
237 }
238 const LocationContext *NContext = N->getLocationContext();
239 if (NContext == LeakContext || NContext->isParentOf(LeakContext))
240 StartCallNode = N;
241 N = N->pred_empty() ? nullptr : *(N->pred_begin());
242 }
243
244 return StartCallNode;
245}
246
247void ValistChecker::reportUninitializedAccess(const MemRegion *VAList,
248 StringRef Msg,
249 CheckerContext &C) const {
250 if (!ChecksEnabled[CK_Uninitialized])
251 return;
252 if (ExplodedNode *N = C.generateErrorNode()) {
253 if (!BT_uninitaccess)
254 BT_uninitaccess.reset(new BugType(CheckNames[CK_Uninitialized],
255 "Uninitialized va_list",
257 auto R = std::make_unique<PathSensitiveBugReport>(*BT_uninitaccess, Msg, N);
258 R->markInteresting(VAList);
259 R->addVisitor(std::make_unique<ValistBugVisitor>(VAList));
260 C.emitReport(std::move(R));
261 }
262}
263
264void ValistChecker::reportLeakedVALists(const RegionVector &LeakedVALists,
265 StringRef Msg1, StringRef Msg2,
267 bool ReportUninit) const {
268 if (!(ChecksEnabled[CK_Unterminated] ||
269 (ChecksEnabled[CK_Uninitialized] && ReportUninit)))
270 return;
271 for (auto Reg : LeakedVALists) {
272 if (!BT_leakedvalist) {
273 // FIXME: maybe creating a new check name for this type of bug is a better
274 // solution.
275 BT_leakedvalist.reset(
276 new BugType(CheckNames[CK_Unterminated].getName().empty()
277 ? CheckNames[CK_Uninitialized]
278 : CheckNames[CK_Unterminated],
279 "Leaked va_list", categories::MemoryError,
280 /*SuppressOnSink=*/true));
281 }
282
283 const ExplodedNode *StartNode = getStartCallSite(N, Reg);
284 PathDiagnosticLocation LocUsedForUniqueing;
285
286 if (const Stmt *StartCallStmt = StartNode->getStmtForDiagnostics())
287 LocUsedForUniqueing = PathDiagnosticLocation::createBegin(
288 StartCallStmt, C.getSourceManager(), StartNode->getLocationContext());
289
291 llvm::raw_svector_ostream OS(Buf);
292 OS << Msg1;
293 std::string VariableName = Reg->getDescriptiveName();
294 if (!VariableName.empty())
295 OS << " " << VariableName;
296 OS << Msg2;
297
298 auto R = std::make_unique<PathSensitiveBugReport>(
299 *BT_leakedvalist, OS.str(), N, LocUsedForUniqueing,
300 StartNode->getLocationContext()->getDecl());
301 R->markInteresting(Reg);
302 R->addVisitor(std::make_unique<ValistBugVisitor>(Reg, true));
303 C.emitReport(std::move(R));
304 }
305}
306
307void ValistChecker::checkVAListStartCall(const CallEvent &Call,
308 CheckerContext &C, bool IsCopy) const {
309 bool Symbolic;
310 const MemRegion *VAList =
311 getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), Symbolic, C);
312 if (!VAList)
313 return;
314
315 ProgramStateRef State = C.getState();
316
317 if (IsCopy) {
318 const MemRegion *Arg2 =
319 getVAListAsRegion(Call.getArgSVal(1), Call.getArgExpr(1), Symbolic, C);
320 if (Arg2) {
321 if (ChecksEnabled[CK_CopyToSelf] && VAList == Arg2) {
322 RegionVector LeakedVALists{VAList};
323 if (ExplodedNode *N = C.addTransition(State))
324 reportLeakedVALists(LeakedVALists, "va_list",
325 " is copied onto itself", C, N, true);
326 return;
327 } else if (!State->contains<InitializedVALists>(Arg2) && !Symbolic) {
328 if (State->contains<InitializedVALists>(VAList)) {
329 State = State->remove<InitializedVALists>(VAList);
330 RegionVector LeakedVALists{VAList};
331 if (ExplodedNode *N = C.addTransition(State))
332 reportLeakedVALists(LeakedVALists, "Initialized va_list",
333 " is overwritten by an uninitialized one", C, N,
334 true);
335 } else {
336 reportUninitializedAccess(Arg2, "Uninitialized va_list is copied", C);
337 }
338 return;
339 }
340 }
341 }
342 if (State->contains<InitializedVALists>(VAList)) {
343 RegionVector LeakedVALists{VAList};
344 if (ExplodedNode *N = C.addTransition(State))
345 reportLeakedVALists(LeakedVALists, "Initialized va_list",
346 " is initialized again", C, N);
347 return;
348 }
349
350 State = State->add<InitializedVALists>(VAList);
351 C.addTransition(State);
352}
353
354void ValistChecker::checkVAListEndCall(const CallEvent &Call,
355 CheckerContext &C) const {
356 bool Symbolic;
357 const MemRegion *VAList =
358 getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), Symbolic, C);
359 if (!VAList)
360 return;
361
362 // We did not see va_start call, but the source of the region is unknown.
363 // Be conservative and assume the best.
364 if (Symbolic)
365 return;
366
367 if (!C.getState()->contains<InitializedVALists>(VAList)) {
368 reportUninitializedAccess(
369 VAList, "va_end() is called on an uninitialized va_list", C);
370 return;
371 }
372 ProgramStateRef State = C.getState();
373 State = State->remove<InitializedVALists>(VAList);
374 C.addTransition(State);
375}
376
377PathDiagnosticPieceRef ValistChecker::ValistBugVisitor::VisitNode(
379 ProgramStateRef State = N->getState();
380 ProgramStateRef StatePrev = N->getFirstPred()->getState();
381
382 const Stmt *S = N->getStmtForDiagnostics();
383 if (!S)
384 return nullptr;
385
386 StringRef Msg;
387 if (State->contains<InitializedVALists>(Reg) &&
388 !StatePrev->contains<InitializedVALists>(Reg))
389 Msg = "Initialized va_list";
390 else if (!State->contains<InitializedVALists>(Reg) &&
391 StatePrev->contains<InitializedVALists>(Reg))
392 Msg = "Ended va_list";
393
394 if (Msg.empty())
395 return nullptr;
396
398 N->getLocationContext());
399 return std::make_shared<PathDiagnosticEventPiece>(Pos, Msg, true);
400}
401
402void ento::registerValistBase(CheckerManager &mgr) {
403 mgr.registerChecker<ValistChecker>();
404}
405
406bool ento::shouldRegisterValistBase(const CheckerManager &mgr) {
407 return true;
408}
409
410#define REGISTER_CHECKER(name) \
411 void ento::register##name##Checker(CheckerManager &mgr) { \
412 ValistChecker *checker = mgr.getChecker<ValistChecker>(); \
413 checker->ChecksEnabled[ValistChecker::CK_##name] = true; \
414 checker->CheckNames[ValistChecker::CK_##name] = \
415 mgr.getCurrentCheckerName(); \
416 } \
417 \
418 bool ento::shouldRegister##name##Checker(const CheckerManager &mgr) { \
419 return true; \
420 }
421
423REGISTER_CHECKER(Unterminated)
424REGISTER_CHECKER(CopyToSelf)
Expr * E
#define X(type, name)
Definition: Value.h:143
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
static std::string getName(const CallEvent &Call)
@ Uninitialized
#define REGISTER_CHECKER(name)
This represents one expression.
Definition: Expr.h:110
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
bool isParentOf(const LocationContext *LC) const
const Decl * getDecl() const
A (possibly-)qualified type.
Definition: Type.h:941
Stmt - This represents one statement.
Definition: Stmt.h:84
bool isPointerType() const
Definition: Type.h:8003
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:705
bool isRecordType() const
Definition: Type.h:8103
Represents a call to the builtin function __builtin_va_arg.
Definition: Expr.h:4701
const Expr * getSubExpr() const
Definition: Expr.h:4717
StringRef getDescription() const
A verbose warning message that is appropriate for displaying next to the source code that introduces ...
Definition: BugReporter.h:157
const SourceManager & getSourceManager() const
Definition: BugReporter.h:737
BugReporterVisitors are used to add custom diagnostics along a path.
A CallDescription is a pattern that can be used to match calls based on the qualified name and the ar...
bool matches(const CallEvent &Call) const
Returns true if the CallEvent is a call to a function that matches the CallDescription.
Represents an abstract call to a function or method along a particular path.
Definition: CallEvent.h:153
CHECKER * registerChecker(AT &&... Args)
Used to register checkers.
This wrapper is used to ensure that only StringRefs originating from the CheckerRegistry are used as ...
const ProgramStateRef & getState() const
pred_iterator pred_begin()
const Stmt * getStmtForDiagnostics() const
If the node's program point corresponds to a statement, retrieve that statement.
const LocationContext * getLocationContext() const
ExplodedNode * getFirstPred()
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:97
std::string getDescriptiveName(bool UseQuotes=true) const
Get descriptive name for memory region.
Definition: MemRegion.cpp:718
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getBaseRegion() const
Definition: MemRegion.cpp:1354
const RegionTy * getAs() const
Definition: MemRegion.h:1388
static PathDiagnosticLocation createBegin(const Decl *D, const SourceManager &SM)
Create a location for the beginning of the declaration.
PathDiagnosticLocation getLocation() const override
The primary location of the bug report that points at the undesirable behavior in the code.
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:55
const MemRegion * getAsRegion() const
Definition: SVals.cpp:120
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition: SVals.h:82
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
Definition: MemRegion.h:459
A class responsible for cleaning up unused symbols.
bool isLiveRegion(const MemRegion *region)
SymbolicRegion - A special, "non-concrete" region.
Definition: MemRegion.h:780
std::shared_ptr< PathDiagnosticPiece > PathDiagnosticPieceRef
bool Cast(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2052
The JSON file list parser is used to communicate input to InstallAPI.