clang 20.0.0git
ExplodedGraph.cpp
Go to the documentation of this file.
1//===- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the template classes ExplodedNode and ExplodedGraph,
10// which represent a path-sensitive, intra-procedural "exploded graph."
11//
12//===----------------------------------------------------------------------===//
13
15#include "clang/AST/Expr.h"
16#include "clang/AST/ExprObjC.h"
17#include "clang/AST/ParentMap.h"
18#include "clang/AST/Stmt.h"
22#include "clang/Basic/LLVM.h"
26#include "llvm/ADT/DenseSet.h"
27#include "llvm/ADT/FoldingSet.h"
28#include "llvm/ADT/PointerUnion.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/Support/Casting.h"
31#include <cassert>
32#include <memory>
33#include <optional>
34
35using namespace clang;
36using namespace ento;
37
38//===----------------------------------------------------------------------===//
39// Cleanup.
40//===----------------------------------------------------------------------===//
41
43
45
46//===----------------------------------------------------------------------===//
47// Node reclamation.
48//===----------------------------------------------------------------------===//
49
51 if (!Ex->isLValue())
52 return false;
53 return isa<DeclRefExpr, MemberExpr, ObjCIvarRefExpr, ArraySubscriptExpr>(Ex);
54}
55
56bool ExplodedGraph::shouldCollect(const ExplodedNode *node) {
57 // First, we only consider nodes for reclamation of the following
58 // conditions apply:
59 //
60 // (1) 1 predecessor (that has one successor)
61 // (2) 1 successor (that has one predecessor)
62 //
63 // If a node has no successor it is on the "frontier", while a node
64 // with no predecessor is a root.
65 //
66 // After these prerequisites, we discard all "filler" nodes that
67 // are used only for intermediate processing, and are not essential
68 // for analyzer history:
69 //
70 // (a) PreStmtPurgeDeadSymbols
71 //
72 // We then discard all other nodes where *all* of the following conditions
73 // apply:
74 //
75 // (3) The ProgramPoint is for a PostStmt, but not a PostStore.
76 // (4) There is no 'tag' for the ProgramPoint.
77 // (5) The 'store' is the same as the predecessor.
78 // (6) The 'GDM' is the same as the predecessor.
79 // (7) The LocationContext is the same as the predecessor.
80 // (8) Expressions that are *not* lvalue expressions.
81 // (9) The PostStmt isn't for a non-consumed Stmt or Expr.
82 // (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or
83 // PreImplicitCall (so that we would be able to find it when retrying a
84 // call with no inlining).
85 // FIXME: It may be safe to reclaim PreCall and PostCall nodes as well.
86
87 // Conditions 1 and 2.
88 if (node->pred_size() != 1 || node->succ_size() != 1)
89 return false;
90
91 const ExplodedNode *pred = *(node->pred_begin());
92 if (pred->succ_size() != 1)
93 return false;
94
95 const ExplodedNode *succ = *(node->succ_begin());
96 if (succ->pred_size() != 1)
97 return false;
98
99 // Now reclaim any nodes that are (by definition) not essential to
100 // analysis history and are not consulted by any client code.
101 ProgramPoint progPoint = node->getLocation();
102 if (progPoint.getAs<PreStmtPurgeDeadSymbols>())
103 return !progPoint.getTag();
104
105 // Condition 3.
106 if (!progPoint.getAs<PostStmt>() || progPoint.getAs<PostStore>())
107 return false;
108
109 // Condition 4.
110 if (progPoint.getTag())
111 return false;
112
113 // Conditions 5, 6, and 7.
114 ProgramStateRef state = node->getState();
115 ProgramStateRef pred_state = pred->getState();
116 if (state->store != pred_state->store || state->GDM != pred_state->GDM ||
117 progPoint.getLocationContext() != pred->getLocationContext())
118 return false;
119
120 // All further checks require expressions. As per #3, we know that we have
121 // a PostStmt.
122 const Expr *Ex = dyn_cast<Expr>(progPoint.castAs<PostStmt>().getStmt());
123 if (!Ex)
124 return false;
125
126 // Condition 8.
127 // Do not collect nodes for "interesting" lvalue expressions since they are
128 // used extensively for generating path diagnostics.
130 return false;
131
132 // Condition 9.
133 // Do not collect nodes for non-consumed Stmt or Expr to ensure precise
134 // diagnostic generation; specifically, so that we could anchor arrows
135 // pointing to the beginning of statements (as written in code).
136 const ParentMap &PM = progPoint.getLocationContext()->getParentMap();
137 if (!PM.isConsumedExpr(Ex))
138 return false;
139
140 // Condition 10.
141 const ProgramPoint SuccLoc = succ->getLocation();
142 if (std::optional<StmtPoint> SP = SuccLoc.getAs<StmtPoint>())
143 if (CallEvent::isCallStmt(SP->getStmt()))
144 return false;
145
146 // Condition 10, continuation.
147 if (SuccLoc.getAs<CallEnter>() || SuccLoc.getAs<PreImplicitCall>())
148 return false;
149
150 return true;
151}
152
153void ExplodedGraph::collectNode(ExplodedNode *node) {
154 // Removing a node means:
155 // (a) changing the predecessors successor to the successor of this node
156 // (b) changing the successors predecessor to the predecessor of this node
157 // (c) Putting 'node' onto freeNodes.
158 assert(node->pred_size() == 1 || node->succ_size() == 1);
159 ExplodedNode *pred = *(node->pred_begin());
160 ExplodedNode *succ = *(node->succ_begin());
161 pred->replaceSuccessor(succ);
162 succ->replacePredecessor(pred);
163 FreeNodes.push_back(node);
164 Nodes.RemoveNode(node);
165 --NumNodes;
166 node->~ExplodedNode();
167}
168
170 if (ChangedNodes.empty())
171 return;
172
173 // Only periodically reclaim nodes so that we can build up a set of
174 // nodes that meet the reclamation criteria. Freshly created nodes
175 // by definition have no successor, and thus cannot be reclaimed (see below).
176 assert(ReclaimCounter > 0);
177 if (--ReclaimCounter != 0)
178 return;
180
181 for (const auto node : ChangedNodes)
182 if (shouldCollect(node))
183 collectNode(node);
184 ChangedNodes.clear();
185}
186
187//===----------------------------------------------------------------------===//
188// ExplodedNode.
189//===----------------------------------------------------------------------===//
190
191// An NodeGroup's storage type is actually very much like a TinyPtrVector:
192// it can be either a pointer to a single ExplodedNode, or a pointer to a
193// BumpVector allocated with the ExplodedGraph's allocator. This allows the
194// common case of single-node NodeGroups to be implemented with no extra memory.
195//
196// Consequently, each of the NodeGroup methods have up to four cases to handle:
197// 1. The flag is set and this group does not actually contain any nodes.
198// 2. The group is empty, in which case the storage value is null.
199// 3. The group contains a single node.
200// 4. The group contains more than one node.
202using GroupStorage = llvm::PointerUnion<ExplodedNode *, ExplodedNodeVector *>;
203
205 assert(!V->isSink());
206 Preds.addNode(V, G);
207 V->Succs.addNode(this, G);
208}
209
210void ExplodedNode::NodeGroup::replaceNode(ExplodedNode *node) {
211 assert(!getFlag());
212
213 GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
214 assert(isa<ExplodedNode *>(Storage));
215 Storage = node;
216 assert(isa<ExplodedNode *>(Storage));
217}
218
219void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) {
220 assert(!getFlag());
221
222 GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
223 if (Storage.isNull()) {
224 Storage = N;
225 assert(isa<ExplodedNode *>(Storage));
226 return;
227 }
228
229 ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>();
230
231 if (!V) {
232 // Switch from single-node to multi-node representation.
233 auto *Old = cast<ExplodedNode *>(Storage);
234
236 V = new (G.getAllocator()) ExplodedNodeVector(Ctx, 4);
237 V->push_back(Old, Ctx);
238
239 Storage = V;
240 assert(!getFlag());
241 assert(isa<ExplodedNodeVector *>(Storage));
242 }
243
244 V->push_back(N, G.getNodeAllocator());
245}
246
247unsigned ExplodedNode::NodeGroup::size() const {
248 if (getFlag())
249 return 0;
250
251 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
252 if (Storage.isNull())
253 return 0;
254 if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>())
255 return V->size();
256 return 1;
257}
258
259ExplodedNode * const *ExplodedNode::NodeGroup::begin() const {
260 if (getFlag())
261 return nullptr;
262
263 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
264 if (Storage.isNull())
265 return nullptr;
266 if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>())
267 return V->begin();
268 return Storage.getAddrOfPtr1();
269}
270
271ExplodedNode * const *ExplodedNode::NodeGroup::end() const {
272 if (getFlag())
273 return nullptr;
274
275 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
276 if (Storage.isNull())
277 return nullptr;
278 if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>())
279 return V->end();
280 return Storage.getAddrOfPtr1() + 1;
281}
282
284 return pred_size() == 1 && succ_size() == 1 &&
285 getFirstPred()->getState()->getID() == getState()->getID() &&
286 getFirstPred()->succ_size() == 1;
287}
288
291 if (auto BEP = P.getAs<BlockEntrance>())
292 return BEP->getBlock();
293
294 // Find the node's current statement in the CFG.
295 // FIXME: getStmtForDiagnostics() does nasty things in order to provide
296 // a valid statement for body farms, do we need this behavior here?
297 if (const Stmt *S = getStmtForDiagnostics())
298 return getLocationContext()
300 ->getCFGStmtMap()
301 ->getBlock(S);
302
303 return nullptr;
304}
305
306static const LocationContext *
309 const LocationContext *ParentLC = LC->getParent();
310 assert(ParentLC && "We don't start analysis from autosynthesized code");
311 while (ParentLC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
312 LC = ParentLC;
313 ParentLC = LC->getParent();
314 assert(ParentLC && "We don't start analysis from autosynthesized code");
315 }
316 return LC;
317}
318
320 // We cannot place diagnostics on autosynthesized code.
321 // Put them onto the call site through which we jumped into autosynthesized
322 // code for the first time.
325 // It must be a stack frame because we only autosynthesize functions.
326 return cast<StackFrameContext>(findTopAutosynthesizedParentContext(LC))
327 ->getCallSite();
328 }
329 // Otherwise, see if the node's program point directly points to a statement.
330 // FIXME: Refactor into a ProgramPoint method?
332 if (auto SP = P.getAs<StmtPoint>())
333 return SP->getStmt();
334 if (auto BE = P.getAs<BlockEdge>())
335 return BE->getSrc()->getTerminatorStmt();
336 if (auto CE = P.getAs<CallEnter>())
337 return CE->getCallExpr();
338 if (auto CEE = P.getAs<CallExitEnd>())
339 return CEE->getCalleeContext()->getCallSite();
340 if (auto PIPP = P.getAs<PostInitializer>())
341 return PIPP->getInitializer()->getInit();
342 if (auto CEB = P.getAs<CallExitBegin>())
343 return CEB->getReturnStmt();
344 if (auto FEP = P.getAs<FunctionExitPoint>())
345 return FEP->getStmt();
346
347 return nullptr;
348}
349
351 for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) {
352 if (N->getLocation().isPurgeKind())
353 continue;
354 if (const Stmt *S = N->getStmtForDiagnostics()) {
355 // Check if the statement is '?' or '&&'/'||'. These are "merges",
356 // not actual statement points.
357 switch (S->getStmtClass()) {
358 case Stmt::ChooseExprClass:
359 case Stmt::BinaryConditionalOperatorClass:
360 case Stmt::ConditionalOperatorClass:
361 continue;
362 case Stmt::BinaryOperatorClass: {
363 BinaryOperatorKind Op = cast<BinaryOperator>(S)->getOpcode();
364 if (Op == BO_LAnd || Op == BO_LOr)
365 continue;
366 break;
367 }
368 default:
369 break;
370 }
371 // We found the statement, so return it.
372 return S;
373 }
374 }
375
376 return nullptr;
377}
378
380 for (const ExplodedNode *N = getFirstPred(); N; N = N->getFirstPred())
381 if (const Stmt *S = N->getStmtForDiagnostics(); S && !isa<CompoundStmt>(S))
382 return S;
383
384 return nullptr;
385}
386
388 if (const Stmt *S = getStmtForDiagnostics())
389 return S;
390
392}
393
395 ProgramStateRef State,
396 bool IsSink,
397 bool* IsNew) {
398 // Profile 'State' to determine if we already have an existing node.
399 llvm::FoldingSetNodeID profile;
400 void *InsertPos = nullptr;
401
402 NodeTy::Profile(profile, L, State, IsSink);
403 NodeTy* V = Nodes.FindNodeOrInsertPos(profile, InsertPos);
404
405 if (!V) {
406 if (!FreeNodes.empty()) {
407 V = FreeNodes.back();
408 FreeNodes.pop_back();
409 }
410 else {
411 // Allocate a new node.
412 V = getAllocator().Allocate<NodeTy>();
413 }
414
415 ++NumNodes;
416 new (V) NodeTy(L, State, NumNodes, IsSink);
417
419 ChangedNodes.push_back(V);
420
421 // Insert the node into the node set and return it.
422 Nodes.InsertNode(V, InsertPos);
423
424 if (IsNew) *IsNew = true;
425 }
426 else
427 if (IsNew) *IsNew = false;
428
429 return V;
430}
431
433 ProgramStateRef State,
434 int64_t Id,
435 bool IsSink) {
436 NodeTy *V = getAllocator().Allocate<NodeTy>();
437 new (V) NodeTy(L, State, Id, IsSink);
438 return V;
439}
440
441std::unique_ptr<ExplodedGraph>
443 InterExplodedGraphMap *ForwardMap,
444 InterExplodedGraphMap *InverseMap) const {
445 if (Nodes.empty())
446 return nullptr;
447
448 using Pass1Ty = llvm::DenseSet<const ExplodedNode *>;
449 Pass1Ty Pass1;
450
451 using Pass2Ty = InterExplodedGraphMap;
452 InterExplodedGraphMap Pass2Scratch;
453 Pass2Ty &Pass2 = ForwardMap ? *ForwardMap : Pass2Scratch;
454
456
457 // ===- Pass 1 (reverse DFS) -===
458 for (const auto Sink : Sinks)
459 if (Sink)
460 WL1.push_back(Sink);
461
462 // Process the first worklist until it is empty.
463 while (!WL1.empty()) {
464 const ExplodedNode *N = WL1.pop_back_val();
465
466 // Have we already visited this node? If so, continue to the next one.
467 if (!Pass1.insert(N).second)
468 continue;
469
470 // If this is a root enqueue it to the second worklist.
471 if (N->Preds.empty()) {
472 WL2.push_back(N);
473 continue;
474 }
475
476 // Visit our predecessors and enqueue them.
477 WL1.append(N->Preds.begin(), N->Preds.end());
478 }
479
480 // We didn't hit a root? Return with a null pointer for the new graph.
481 if (WL2.empty())
482 return nullptr;
483
484 // Create an empty graph.
485 std::unique_ptr<ExplodedGraph> G = MakeEmptyGraph();
486
487 // ===- Pass 2 (forward DFS to construct the new graph) -===
488 while (!WL2.empty()) {
489 const ExplodedNode *N = WL2.pop_back_val();
490
491 // Skip this node if we have already processed it.
492 if (Pass2.contains(N))
493 continue;
494
495 // Create the corresponding node in the new graph and record the mapping
496 // from the old node to the new node.
497 ExplodedNode *NewN = G->createUncachedNode(N->getLocation(), N->State,
498 N->getID(), N->isSink());
499 Pass2[N] = NewN;
500
501 // Also record the reverse mapping from the new node to the old node.
502 if (InverseMap) (*InverseMap)[NewN] = N;
503
504 // If this node is a root, designate it as such in the graph.
505 if (N->Preds.empty())
506 G->addRoot(NewN);
507
508 // In the case that some of the intended predecessors of NewN have already
509 // been created, we should hook them up as predecessors.
510
511 // Walk through the predecessors of 'N' and hook up their corresponding
512 // nodes in the new graph (if any) to the freshly created node.
513 for (const ExplodedNode *Pred : N->Preds) {
514 Pass2Ty::iterator PI = Pass2.find(Pred);
515 if (PI == Pass2.end())
516 continue;
517
518 NewN->addPredecessor(const_cast<ExplodedNode *>(PI->second), *G);
519 }
520
521 // In the case that some of the intended successors of NewN have already
522 // been created, we should hook them up as successors. Otherwise, enqueue
523 // the new nodes from the original graph that should have nodes created
524 // in the new graph.
525 for (const ExplodedNode *Succ : N->Succs) {
526 Pass2Ty::iterator PI = Pass2.find(Succ);
527 if (PI != Pass2.end()) {
528 const_cast<ExplodedNode *>(PI->second)->addPredecessor(NewN, *G);
529 continue;
530 }
531
532 // Enqueue nodes to the worklist that were marked during pass 1.
533 if (Pass1.count(Succ))
534 WL2.push_back(Succ);
535 }
536 }
537
538 return G;
539}
#define V(N, I)
Definition: ASTContext.h:3443
StringRef P
llvm::PointerUnion< ExplodedNode *, ExplodedNodeVector * > GroupStorage
BumpVector< ExplodedNode * > ExplodedNodeVector
static const LocationContext * findTopAutosynthesizedParentContext(const LocationContext *LC)
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
uint32_t Id
Definition: SemaARM.cpp:1134
Represents a single basic block in a source-level CFG.
Definition: CFG.h:604
CFGBlock * getBlock(Stmt *S)
Returns the CFGBlock the specified Stmt* appears in.
Definition: CFGStmtMap.cpp:27
Represents a point when we begin processing an inlined call.
Definition: ProgramPoint.h:628
Represents a point when we start the call exit sequence (for inlined call).
Definition: ProgramPoint.h:666
Represents a point when we finish the call exit sequence (for inlined call).
Definition: ProgramPoint.h:686
This represents one expression.
Definition: Expr.h:110
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language.
Definition: Expr.h:277
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
const ParentMap & getParentMap() const
LLVM_ATTRIBUTE_RETURNS_NONNULL AnalysisDeclContext * getAnalysisDeclContext() const
const LocationContext * getParent() const
It might return null.
bool isConsumedExpr(Expr *E) const
Definition: ParentMap.cpp:174
Represents a program point after a store evaluation.
Definition: ProgramPoint.h:426
Represents a program point just before an implicit call event.
Definition: ProgramPoint.h:579
Represents a point after we ran remove dead bindings BEFORE processing the given statement.
Definition: ProgramPoint.h:468
const ProgramPointTag * getTag() const
Definition: ProgramPoint.h:173
bool isPurgeKind()
Is this a program point corresponding to purge/removal of dead symbols and bindings.
Definition: ProgramPoint.h:167
T castAs() const
Convert to the specified ProgramPoint type, asserting that this ProgramPoint is of the desired type.
Definition: ProgramPoint.h:137
std::optional< T > getAs() const
Convert to the specified ProgramPoint type, returning std::nullopt if this ProgramPoint is not of the...
Definition: ProgramPoint.h:147
const LocationContext * getLocationContext() const
Definition: ProgramPoint.h:175
const Stmt * getStmt() const
Definition: ProgramPoint.h:274
Stmt - This represents one statement.
Definition: Stmt.h:84
static bool isCallStmt(const Stmt *S)
Returns true if this is a statement is a function or method call of some kind.
Definition: CallEvent.cpp:348
std::unique_ptr< ExplodedGraph > trim(ArrayRef< const NodeTy * > Nodes, InterExplodedGraphMap *ForwardMap=nullptr, InterExplodedGraphMap *InverseMap=nullptr) const
Creates a trimmed version of the graph that only contains paths leading to the given nodes.
BumpVectorContext & getNodeAllocator()
unsigned ReclaimCounter
Counter to determine when to reclaim nodes.
NodeVector ChangedNodes
A list of recently allocated nodes that can potentially be recycled.
int64_t NumNodes
NumNodes - The number of nodes in the graph.
unsigned ReclaimNodeInterval
Determines how often nodes are reclaimed.
void reclaimRecentlyAllocatedNodes()
Reclaim "uninteresting" nodes created since the last time this method was called.
static bool isInterestingLValueExpr(const Expr *Ex)
Returns true if nodes for the given expression kind are always kept around.
llvm::BumpPtrAllocator & getAllocator()
std::unique_ptr< ExplodedGraph > MakeEmptyGraph() const
ExplodedNode * getNode(const ProgramPoint &L, ProgramStateRef State, bool IsSink=false, bool *IsNew=nullptr)
Retrieve the node associated with a (Location,State) pair, where the 'Location' is a ProgramPoint in ...
ExplodedNode * createUncachedNode(const ProgramPoint &L, ProgramStateRef State, int64_t Id, bool IsSink=false)
Create a node for a (Location, State) pair, but don't store it for deduplication later.
llvm::FoldingSet< ExplodedNode > Nodes
Nodes - The nodes in the graph.
NodeVector FreeNodes
A list of nodes that can be reused.
const CFGBlock * getCFGBlock() const
const ProgramStateRef & getState() const
const Stmt * getStmtForDiagnostics() const
If the node's program point corresponds to a statement, retrieve that statement.
bool isTrivial() const
The node is trivial if it has only one successor, only one predecessor, it's predecessor has only one...
const Stmt * getPreviousStmtForDiagnostics() const
Find the statement that was executed immediately before this node.
ProgramPoint getLocation() const
getLocation - Returns the edge associated with the given node.
void addPredecessor(ExplodedNode *V, ExplodedGraph &G)
addPredeccessor - Adds a predecessor to the current node, and in tandem add this node as a successor ...
ExplodedNode * getFirstSucc()
unsigned pred_size() const
static void Profile(llvm::FoldingSetNodeID &ID, const ProgramPoint &Loc, const ProgramStateRef &state, bool IsSink)
const Stmt * getNextStmtForDiagnostics() const
Find the next statement that was executed on this node's execution path.
const LocationContext * getLocationContext() const
const Stmt * getCurrentOrPreviousStmtForDiagnostics() const
Find the statement that was executed at or immediately before this node.
ExplodedNode * getFirstPred()
unsigned succ_size() const
llvm::DenseMap< const ExplodedNode *, const ExplodedNode * > InterExplodedGraphMap
The JSON file list parser is used to communicate input to InstallAPI.
BinaryOperatorKind