clang 20.0.0git
Taint.cpp
Go to the documentation of this file.
1//=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Defines basic, non-domain-specific mechanisms for tracking tainted values.
10//
11//===----------------------------------------------------------------------===//
12
17#include <optional>
18
19using namespace clang;
20using namespace ento;
21using namespace taint;
22
23// Fully tainted symbols.
25
26// Partially tainted symbols.
29REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
30
31void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
32 const char *Sep) {
33 TaintMapTy TM = State->get<TaintMap>();
34
35 if (!TM.isEmpty())
36 Out << "Tainted symbols:" << NL;
37
38 for (const auto &I : TM)
39 Out << I.first << " : " << I.second << NL;
40}
41
43 printTaint(State, llvm::errs());
44}
45
47 const LocationContext *LCtx,
48 TaintTagType Kind) {
49 return addTaint(State, State->getSVal(S, LCtx), Kind);
50}
51
53 TaintTagType Kind) {
54 SymbolRef Sym = V.getAsSymbol();
55 if (Sym)
56 return addTaint(State, Sym, Kind);
57
58 // If the SVal represents a structure, try to mass-taint all values within the
59 // structure. For now it only works efficiently on lazy compound values that
60 // were conjured during a conservative evaluation of a function - either as
61 // return values of functions that return structures or arrays by value, or as
62 // values of structures or arrays passed into the function by reference,
63 // directly or through pointer aliasing. Such lazy compound values are
64 // characterized by having exactly one binding in their captured store within
65 // their parent region, which is a conjured symbol default-bound to the base
66 // region of the parent region.
67 if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
68 if (std::optional<SVal> binding =
69 State->getStateManager().getStoreManager().getDefaultBinding(
70 *LCV)) {
71 if (SymbolRef Sym = binding->getAsSymbol())
72 return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
73 }
74 }
75
76 const MemRegion *R = V.getAsRegion();
77 return addTaint(State, R, Kind);
78}
79
81 TaintTagType Kind) {
82 if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
83 return addTaint(State, SR->getSymbol(), Kind);
84 return State;
85}
86
88 TaintTagType Kind) {
89 // If this is a symbol cast, remove the cast before adding the taint. Taint
90 // is cast agnostic.
91 while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
92 Sym = SC->getOperand();
93
94 ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
95 assert(NewState);
96 return NewState;
97}
98
100 SymbolRef Sym = V.getAsSymbol();
101 if (Sym)
102 return removeTaint(State, Sym);
103
104 const MemRegion *R = V.getAsRegion();
105 return removeTaint(State, R);
106}
107
109 if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
110 return removeTaint(State, SR->getSymbol());
111 return State;
112}
113
115 // If this is a symbol cast, remove the cast before adding the taint. Taint
116 // is cast agnostic.
117 while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
118 Sym = SC->getOperand();
119
120 ProgramStateRef NewState = State->remove<TaintMap>(Sym);
121 assert(NewState);
122 return NewState;
123}
124
126 SymbolRef ParentSym,
127 const SubRegion *SubRegion,
128 TaintTagType Kind) {
129 // Ignore partial taint if the entire parent symbol is already tainted.
130 if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
131 if (*T == Kind)
132 return State;
133
134 // Partial taint applies if only a portion of the symbol is tainted.
136 return addTaint(State, ParentSym, Kind);
137
138 const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
139 TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
140 TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
141
142 Regs = F.add(Regs, SubRegion, Kind);
143 ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
144 assert(NewState);
145 return NewState;
146}
147
149 const LocationContext *LCtx, TaintTagType Kind) {
150 return !getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/true)
151 .empty();
152}
153
155 return !getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/true)
156 .empty();
157}
158
160 TaintTagType K) {
161 return !getTaintedSymbolsImpl(State, Reg, K, /*ReturnFirstOnly=*/true)
162 .empty();
163}
164
166 return !getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/true)
167 .empty();
168}
169
170std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
171 const Stmt *S,
172 const LocationContext *LCtx,
173 TaintTagType Kind) {
174 return getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/false);
175}
176
177std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, SVal V,
178 TaintTagType Kind) {
179 return getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/false);
180}
181
182std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
183 SymbolRef Sym,
184 TaintTagType Kind) {
185 return getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/false);
186}
187
188std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State,
189 const MemRegion *Reg,
190 TaintTagType Kind) {
191 return getTaintedSymbolsImpl(State, Reg, Kind, /*ReturnFirstOnly=*/false);
192}
193
195 const Stmt *S,
196 const LocationContext *LCtx,
197 TaintTagType Kind,
198 bool returnFirstOnly) {
199 SVal val = State->getSVal(S, LCtx);
200 return getTaintedSymbolsImpl(State, val, Kind, returnFirstOnly);
201}
202
204 SVal V, TaintTagType Kind,
205 bool returnFirstOnly) {
206 if (SymbolRef Sym = V.getAsSymbol())
207 return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly);
208 if (const MemRegion *Reg = V.getAsRegion())
209 return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly);
210
211 if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
212 StoreManager &StoreMgr = State->getStateManager().getStoreManager();
213 if (auto DefaultVal = StoreMgr.getDefaultBinding(*LCV)) {
214 return getTaintedSymbolsImpl(State, *DefaultVal, Kind, returnFirstOnly);
215 }
216 }
217
218 return {};
219}
220
222 const MemRegion *Reg,
223 TaintTagType K,
224 bool returnFirstOnly) {
225 std::vector<SymbolRef> TaintedSymbols;
226 if (!Reg)
227 return TaintedSymbols;
228
229 // Element region (array element) is tainted if the offset is tainted.
230 if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) {
231 std::vector<SymbolRef> TaintedIndex =
232 getTaintedSymbolsImpl(State, ER->getIndex(), K, returnFirstOnly);
233 llvm::append_range(TaintedSymbols, TaintedIndex);
234 if (returnFirstOnly && !TaintedSymbols.empty())
235 return TaintedSymbols; // return early if needed
236 }
237
238 // Symbolic region is tainted if the corresponding symbol is tainted.
239 if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) {
240 std::vector<SymbolRef> TaintedRegions =
241 getTaintedSymbolsImpl(State, SR->getSymbol(), K, returnFirstOnly);
242 llvm::append_range(TaintedSymbols, TaintedRegions);
243 if (returnFirstOnly && !TaintedSymbols.empty())
244 return TaintedSymbols; // return early if needed
245 }
246
247 // Any subregion (including Element and Symbolic regions) is tainted if its
248 // super-region is tainted.
249 if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) {
250 std::vector<SymbolRef> TaintedSubRegions =
251 getTaintedSymbolsImpl(State, ER->getSuperRegion(), K, returnFirstOnly);
252 llvm::append_range(TaintedSymbols, TaintedSubRegions);
253 if (returnFirstOnly && !TaintedSymbols.empty())
254 return TaintedSymbols; // return early if needed
255 }
256
257 return TaintedSymbols;
258}
259
261 SymbolRef Sym,
262 TaintTagType Kind,
263 bool returnFirstOnly) {
264 std::vector<SymbolRef> TaintedSymbols;
265 if (!Sym)
266 return TaintedSymbols;
267
268 // HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
269 if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions();
270 Sym->computeComplexity() > Opts.MaxTaintedSymbolComplexity) {
271 return {};
272 }
273
274 // Traverse all the symbols this symbol depends on to see if any are tainted.
275 for (SymbolRef SubSym : Sym->symbols()) {
276 if (!isa<SymbolData>(SubSym))
277 continue;
278
279 if (const TaintTagType *Tag = State->get<TaintMap>(SubSym)) {
280 if (*Tag == Kind) {
281 TaintedSymbols.push_back(SubSym);
282 if (returnFirstOnly)
283 return TaintedSymbols; // return early if needed
284 }
285 }
286
287 if (const auto *SD = dyn_cast<SymbolDerived>(SubSym)) {
288 // If this is a SymbolDerived with a tainted parent, it's also tainted.
289 std::vector<SymbolRef> TaintedParents = getTaintedSymbolsImpl(
290 State, SD->getParentSymbol(), Kind, returnFirstOnly);
291 llvm::append_range(TaintedSymbols, TaintedParents);
292 if (returnFirstOnly && !TaintedSymbols.empty())
293 return TaintedSymbols; // return early if needed
294
295 // If this is a SymbolDerived with the same parent symbol as another
296 // tainted SymbolDerived and a region that's a sub-region of that
297 // tainted symbol, it's also tainted.
298 if (const TaintedSubRegions *Regs =
299 State->get<DerivedSymTaint>(SD->getParentSymbol())) {
300 const TypedValueRegion *R = SD->getRegion();
301 for (auto I : *Regs) {
302 // FIXME: The logic to identify tainted regions could be more
303 // complete. For example, this would not currently identify
304 // overlapping fields in a union as tainted. To identify this we can
305 // check for overlapping/nested byte offsets.
306 if (Kind == I.second && R->isSubRegionOf(I.first)) {
307 TaintedSymbols.push_back(SD->getParentSymbol());
308 if (returnFirstOnly && !TaintedSymbols.empty())
309 return TaintedSymbols; // return early if needed
310 }
311 }
312 }
313 }
314
315 // If memory region is tainted, data is also tainted.
316 if (const auto *SRV = dyn_cast<SymbolRegionValue>(SubSym)) {
317 std::vector<SymbolRef> TaintedRegions =
318 getTaintedSymbolsImpl(State, SRV->getRegion(), Kind, returnFirstOnly);
319 llvm::append_range(TaintedSymbols, TaintedRegions);
320 if (returnFirstOnly && !TaintedSymbols.empty())
321 return TaintedSymbols; // return early if needed
322 }
323
324 // If this is a SymbolCast from a tainted value, it's also tainted.
325 if (const auto *SC = dyn_cast<SymbolCast>(SubSym)) {
326 std::vector<SymbolRef> TaintedCasts =
327 getTaintedSymbolsImpl(State, SC->getOperand(), Kind, returnFirstOnly);
328 llvm::append_range(TaintedSymbols, TaintedCasts);
329 if (returnFirstOnly && !TaintedSymbols.empty())
330 return TaintedSymbols; // return early if needed
331 }
332 }
333 return TaintedSymbols;
334}
#define V(N, I)
Definition: ASTContext.h:3443
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
#define REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map type Name and registers the factory for such maps in the program state,...
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
Stmt - This represents one statement.
Definition: Stmt.h:84
ElementRegion is used to represent both array elements and casts.
Definition: MemRegion.h:1199
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:97
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getBaseRegion() const
Definition: MemRegion.cpp:1377
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:56
virtual std::optional< SVal > getDefaultBinding(Store store, const MemRegion *R)=0
Return the default value bound to a region in a given store.
SubRegion - A region that subsets another larger region.
Definition: MemRegion.h:446
bool isSubRegionOf(const MemRegion *R) const override
Check if the region is a subregion of the given region.
Definition: MemRegion.cpp:140
Symbolic value.
Definition: SymExpr.h:30
llvm::iterator_range< symbol_iterator > symbols() const
Definition: SymExpr.h:87
virtual unsigned computeComplexity() const =0
Represents a cast expression.
SymbolicRegion - A special, "non-concrete" region.
Definition: MemRegion.h:780
TypedValueRegion - An abstract class representing regions having a typed value.
Definition: MemRegion.h:535
While nonloc::CompoundVal covers a few simple use cases, nonloc::LazyCompoundVal is a more performant...
Definition: SVals.h:389
std::vector< SymbolRef > getTaintedSymbolsImpl(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind, bool returnFirstOnly)
Definition: Taint.cpp:194
ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
Definition: Taint.cpp:46
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
Definition: Taint.cpp:170
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
Definition: Taint.cpp:148
ProgramStateRef removeTaint(ProgramStateRef State, SVal V)
Definition: Taint.cpp:99
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\n", const char *sep="")
ProgramStateRef addPartialTaint(ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion, TaintTagType Kind=TaintTagGeneric)
Create a new state in a which a sub-region of a given symbol is tainted.
Definition: Taint.cpp:125
LLVM_DUMP_METHOD void dumpTaint(ProgramStateRef State)
Definition: Taint.cpp:42
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T