clang 20.0.0git
DataflowAnalysisContext.h
Go to the documentation of this file.
1//===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DataflowAnalysisContext class that owns objects that
10// encompass the state of a program and stores context that is used during
11// dataflow analysis.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
16#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
17
18#include "clang/AST/Decl.h"
19#include "clang/AST/Expr.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/DenseSet.h"
29#include "llvm/ADT/SetVector.h"
30#include "llvm/Support/Compiler.h"
31#include <cassert>
32#include <memory>
33#include <optional>
34
35namespace clang {
36namespace dataflow {
37class Logger;
38
40 /// The maximum depth to analyze. A value of zero is equivalent to disabling
41 /// context-sensitive analysis entirely.
42 unsigned Depth = 2;
43};
44
45/// Owns objects that encompass the state of a program and stores context that
46/// is used during dataflow analysis.
48public:
49 struct Options {
50 /// Options for analyzing function bodies when present in the translation
51 /// unit, or empty to disable context-sensitive analysis. Note that this is
52 /// fundamentally limited: some constructs, such as recursion, are
53 /// explicitly unsupported.
54 std::optional<ContextSensitiveOptions> ContextSensitiveOpts;
55
56 /// If provided, analysis details will be recorded here.
57 /// (This is always non-null within an AnalysisContext, the framework
58 /// provides a fallback no-op logger).
59 Logger *Log = nullptr;
60 };
61
62 /// Constructs a dataflow analysis context.
63 ///
64 /// Requirements:
65 ///
66 /// `S` must not be null.
67 DataflowAnalysisContext(std::unique_ptr<Solver> S,
68 Options Opts = Options{
69 /*ContextSensitiveOpts=*/std::nullopt,
70 /*Logger=*/nullptr})
71 : DataflowAnalysisContext(*S, std::move(S), Opts) {}
72
73 /// Constructs a dataflow analysis context.
74 ///
75 /// Requirements:
76 ///
77 /// `S` must outlive the `DataflowAnalysisContext`.
79 /*ContextSensitiveOpts=*/std::nullopt,
80 /*Logger=*/nullptr})
81 : DataflowAnalysisContext(S, nullptr, Opts) {}
82
84
85 /// Sets a callback that returns the names and types of the synthetic fields
86 /// to add to a `RecordStorageLocation` of a given type.
87 /// Typically, this is called from the constructor of a `DataflowAnalysis`
88 ///
89 /// The field types returned by the callback may not have reference type.
90 ///
91 /// To maintain the invariant that all `RecordStorageLocation`s of a given
92 /// type have the same fields:
93 /// * The callback must always return the same result for a given type
94 /// * `setSyntheticFieldCallback()` must be called before any
95 // `RecordStorageLocation`s are created.
97 std::function<llvm::StringMap<QualType>(QualType)> CB) {
98 assert(!RecordStorageLocationCreated);
99 SyntheticFieldCallback = CB;
100 }
101
102 /// Returns a new storage location appropriate for `Type`.
103 ///
104 /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`.
106
107 /// Creates a `RecordStorageLocation` for the given type and with the given
108 /// fields.
109 ///
110 /// Requirements:
111 ///
112 /// `FieldLocs` must contain exactly the fields returned by
113 /// `getModeledFields(Type)`.
114 /// `SyntheticFields` must contain exactly the fields returned by
115 /// `getSyntheticFields(Type)`.
119
120 /// Returns a stable storage location for `D`.
122
123 /// Returns a stable storage location for `E`.
125
126 /// Returns a pointer value that represents a null pointer. Calls with
127 /// `PointeeType` that are canonically equivalent will return the same result.
128 /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`.
130
131 /// Adds `Constraint` to current and future flow conditions in this context.
132 ///
133 /// Invariants must contain only flow-insensitive information, i.e. facts that
134 /// are true on all paths through the program.
135 /// Information can be added eagerly (when analysis begins), or lazily (e.g.
136 /// when values are first used). The analysis must be careful that the same
137 /// information is added regardless of which order blocks are analyzed in.
138 void addInvariant(const Formula &Constraint);
139
140 /// Adds `Constraint` to the flow condition identified by `Token`.
141 void addFlowConditionConstraint(Atom Token, const Formula &Constraint);
142
143 /// Creates a new flow condition with the same constraints as the flow
144 /// condition identified by `Token` and returns its token.
146
147 /// Creates a new flow condition that represents the disjunction of the flow
148 /// conditions identified by `FirstToken` and `SecondToken`, and returns its
149 /// token.
150 Atom joinFlowConditions(Atom FirstToken, Atom SecondToken);
151
152 /// Returns true if the constraints of the flow condition identified by
153 /// `Token` imply that `F` is true.
154 /// Returns false if the flow condition does not imply `F` or if the solver
155 /// times out.
156 bool flowConditionImplies(Atom Token, const Formula &F);
157
158 /// Returns true if the constraints of the flow condition identified by
159 /// `Token` still allow `F` to be true.
160 /// Returns false if the flow condition implies that `F` is false or if the
161 /// solver times out.
162 bool flowConditionAllows(Atom Token, const Formula &F);
163
164 /// Returns true if `Val1` is equivalent to `Val2`.
165 /// Note: This function doesn't take into account constraints on `Val1` and
166 /// `Val2` imposed by the flow condition.
167 bool equivalentFormulas(const Formula &Val1, const Formula &Val2);
168
169 LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token,
170 llvm::raw_ostream &OS = llvm::dbgs());
171
172 /// Returns the `AdornedCFG` registered for `F`, if any. Otherwise,
173 /// returns null.
174 const AdornedCFG *getAdornedCFG(const FunctionDecl *F);
175
176 const Options &getOptions() { return Opts; }
177
178 Arena &arena() { return *A; }
179
180 /// Returns the outcome of satisfiability checking on `Constraints`.
181 ///
182 /// Flow conditions are not incorporated, so they may need to be manually
183 /// included in `Constraints` to provide contextually-accurate results, e.g.
184 /// if any definitions or relationships of the values in `Constraints` have
185 /// been stored in flow conditions.
186 Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints);
187
188 /// Returns the fields of `Type`, limited to the set of fields modeled by this
189 /// context.
191
192 /// Returns the names and types of the synthetic fields for the given record
193 /// type.
194 llvm::StringMap<QualType> getSyntheticFields(QualType Type) {
195 assert(Type->isRecordType());
196 if (SyntheticFieldCallback) {
197 llvm::StringMap<QualType> Result = SyntheticFieldCallback(Type);
198 // Synthetic fields are not allowed to have reference type.
199 assert([&Result] {
200 for (const auto &Entry : Result)
201 if (Entry.getValue()->isReferenceType())
202 return false;
203 return true;
204 }());
205 return Result;
206 }
207 return {};
208 }
209
210private:
211 friend class Environment;
212
213 struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> {
214 static QualType getEmptyKey() {
215 // Allow a NULL `QualType` by using a different value as the empty key.
216 return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1));
217 }
218
219 using DenseMapInfo::getHashValue;
220 using DenseMapInfo::getTombstoneKey;
221 using DenseMapInfo::isEqual;
222 };
223
224 /// `S` is the solver to use. `OwnedSolver` may be:
225 /// * Null (in which case `S` is non-onwed and must outlive this object), or
226 /// * Non-null (in which case it must refer to `S`, and the
227 /// `DataflowAnalysisContext will take ownership of `OwnedSolver`).
228 DataflowAnalysisContext(Solver &S, std::unique_ptr<Solver> &&OwnedSolver,
229 Options Opts);
230
231 // Extends the set of modeled field declarations.
232 void addModeledFields(const FieldSet &Fields);
233
234 /// Adds all constraints of the flow condition identified by `Token` and all
235 /// of its transitive dependencies to `Constraints`.
236 void
237 addTransitiveFlowConditionConstraints(Atom Token,
238 llvm::SetVector<const Formula *> &Out);
239
240 /// Returns true if the solver is able to prove that there is a satisfying
241 /// assignment for `Constraints`.
242 bool isSatisfiable(llvm::SetVector<const Formula *> Constraints) {
243 return querySolver(std::move(Constraints)).getStatus() ==
245 }
246
247 /// Returns true if the solver is able to prove that there is no satisfying
248 /// assignment for `Constraints`
249 bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) {
250 return querySolver(std::move(Constraints)).getStatus() ==
252 }
253
254 Solver &S;
255 std::unique_ptr<Solver> OwnedSolver;
256 std::unique_ptr<Arena> A;
257
258 // Maps from program declarations and statements to storage locations that are
259 // assigned to them. These assignments are global (aggregated across all basic
260 // blocks) and are used to produce stable storage locations when the same
261 // basic blocks are evaluated multiple times. The storage locations that are
262 // in scope for a particular basic block are stored in `Environment`.
263 llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
264 llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
265
266 // Null pointer values, keyed by the canonical pointee type.
267 //
268 // FIXME: The pointer values are indexed by the pointee types which are
269 // required to initialize the `PointeeLoc` field in `PointerValue`. Consider
270 // creating a type-independent `NullPointerValue` without a `PointeeLoc`
271 // field.
272 llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo>
273 NullPointerVals;
274
275 Options Opts;
276
277 // Flow conditions are tracked symbolically: each unique flow condition is
278 // associated with a fresh symbolic variable (token), bound to the clause that
279 // defines the flow condition. Conceptually, each binding corresponds to an
280 // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition
281 // token (an atomic boolean) and `Ci`s are the set of constraints in the flow
282 // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in
283 // the `FlowConditionConstraints` map, keyed by the token of the flow
284 // condition.
285 //
286 // Flow conditions depend on other flow conditions if they are created using
287 // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition
288 // dependencies is stored in the `FlowConditionDeps` map.
289 llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps;
290 llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints;
291 const Formula *Invariant = nullptr;
292
293 llvm::DenseMap<const FunctionDecl *, AdornedCFG> FunctionContexts;
294
295 // Fields modeled by environments covered by this context.
296 FieldSet ModeledFields;
297
298 std::unique_ptr<Logger> LogOwner; // If created via flags.
299
300 std::function<llvm::StringMap<QualType>(QualType)> SyntheticFieldCallback;
301
302 /// Has any `RecordStorageLocation` been created yet?
303 bool RecordStorageLocationCreated = false;
304};
305
306} // namespace dataflow
307} // namespace clang
308
309#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
const Decl * D
Expr * E
Allows QualTypes to be sorted and hence used in maps and sets.
This represents one expression.
Definition: Expr.h:110
Represents a function declaration or definition.
Definition: Decl.h:1932
A (possibly-)qualified type.
Definition: Type.h:941
static QualType getFromOpaquePtr(const void *Ptr)
Definition: Type.h:990
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
The base class of the type hierarchy.
Definition: Type.h:1829
bool isRecordType() const
Definition: Type.h:8103
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:667
Holds CFG with additional information derived from it that is needed to perform dataflow analysis.
Definition: AdornedCFG.h:47
The Arena owns the objects that model data within an analysis.
Definition: Arena.h:21
Owns objects that encompass the state of a program and stores context that is used during dataflow an...
void setSyntheticFieldCallback(std::function< llvm::StringMap< QualType >(QualType)> CB)
Sets a callback that returns the names and types of the synthetic fields to add to a RecordStorageLoc...
const AdornedCFG * getAdornedCFG(const FunctionDecl *F)
Returns the AdornedCFG registered for F, if any.
DataflowAnalysisContext(std::unique_ptr< Solver > S, Options Opts=Options{ std::nullopt, nullptr})
Constructs a dataflow analysis context.
Atom joinFlowConditions(Atom FirstToken, Atom SecondToken)
Creates a new flow condition that represents the disjunction of the flow conditions identified by Fir...
void addFlowConditionConstraint(Atom Token, const Formula &Constraint)
Adds Constraint to the flow condition identified by Token.
Atom forkFlowCondition(Atom Token)
Creates a new flow condition with the same constraints as the flow condition identified by Token and ...
bool equivalentFormulas(const Formula &Val1, const Formula &Val2)
Returns true if Val1 is equivalent to Val2.
StorageLocation & getStableStorageLocation(const ValueDecl &D)
Returns a stable storage location for D.
bool flowConditionImplies(Atom Token, const Formula &F)
Returns true if the constraints of the flow condition identified by Token imply that F is true.
Solver::Result querySolver(llvm::SetVector< const Formula * > Constraints)
Returns the outcome of satisfiability checking on Constraints.
bool flowConditionAllows(Atom Token, const Formula &F)
Returns true if the constraints of the flow condition identified by Token still allow F to be true.
PointerValue & getOrCreateNullPointerValue(QualType PointeeType)
Returns a pointer value that represents a null pointer.
void addInvariant(const Formula &Constraint)
Adds Constraint to current and future flow conditions in this context.
llvm::StringMap< QualType > getSyntheticFields(QualType Type)
Returns the names and types of the synthetic fields for the given record type.
StorageLocation & createStorageLocation(QualType Type)
Returns a new storage location appropriate for Type.
FieldSet getModeledFields(QualType Type)
Returns the fields of Type, limited to the set of fields modeled by this context.
LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token, llvm::raw_ostream &OS=llvm::dbgs())
DataflowAnalysisContext(Solver &S, Options Opts=Options{ std::nullopt, nullptr})
Constructs a dataflow analysis context.
RecordStorageLocation & createRecordStorageLocation(QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, RecordStorageLocation::SyntheticFieldMap SyntheticFields)
Creates a RecordStorageLocation for the given type and with the given fields.
Holds the state of the program (store and heap) at a given program point.
A logger is notified as the analysis progresses.
Definition: Logger.h:27
Models a symbolic pointer. Specifically, any value of type T*.
Definition: Value.h:170
A storage location for a record (struct, class, or union).
llvm::DenseMap< const ValueDecl *, StorageLocation * > FieldToLoc
llvm::StringMap< StorageLocation * > SyntheticFieldMap
An interface for a SAT solver that can be used by dataflow analyses.
Definition: Solver.h:28
Base class for elements of the local variable store and of the heap.
Atom
Identifies an atomic boolean variable such as "V1".
Definition: Formula.h:34
llvm::SmallSetVector< const FieldDecl *, 4 > FieldSet
A set of FieldDecl *.
Definition: ASTOps.h:42
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
unsigned Depth
The maximum depth to analyze.
Logger * Log
If provided, analysis details will be recorded here.
std::optional< ContextSensitiveOptions > ContextSensitiveOpts
Options for analyzing function bodies when present in the translation unit, or empty to disable conte...
Status getStatus() const
Returns the status of satisfiability checking on the queried boolean formula.
Definition: Solver.h:64
@ Unsatisfiable
Indicates that there is no satisfying assignment for a boolean formula.
@ Satisfiable
Indicates that there exists a satisfying assignment for a boolean formula.