clang 20.0.0git
DereferenceChecker.cpp
Go to the documentation of this file.
1//===-- DereferenceChecker.cpp - Null dereference checker -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This defines NullDerefChecker, a builtin check in ExprEngine that performs
10// checks for null pointers at loads and stores.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/AST/ExprObjC.h"
23#include "llvm/ADT/SmallString.h"
24#include "llvm/Support/raw_ostream.h"
25
26using namespace clang;
27using namespace ento;
28
29namespace {
30class DereferenceChecker
31 : public Checker< check::Location,
32 check::Bind,
33 EventDispatcher<ImplicitNullDerefEvent> > {
34 enum DerefKind { NullPointer, UndefinedPointerValue, AddressOfLabel };
35
36 BugType BT_Null{this, "Dereference of null pointer", categories::LogicError};
37 BugType BT_Undef{this, "Dereference of undefined pointer value",
39 BugType BT_Label{this, "Dereference of the address of a label",
41
42 void reportBug(DerefKind K, ProgramStateRef State, const Stmt *S,
43 CheckerContext &C) const;
44
45 bool suppressReport(CheckerContext &C, const Expr *E) const;
46
47public:
48 void checkLocation(SVal location, bool isLoad, const Stmt* S,
49 CheckerContext &C) const;
50 void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const;
51
52 static void AddDerefSource(raw_ostream &os,
54 const Expr *Ex, const ProgramState *state,
55 const LocationContext *LCtx,
56 bool loadedFrom = false);
57
58 bool SuppressAddressSpaces = false;
59};
60} // end anonymous namespace
61
62void
63DereferenceChecker::AddDerefSource(raw_ostream &os,
65 const Expr *Ex,
66 const ProgramState *state,
67 const LocationContext *LCtx,
68 bool loadedFrom) {
69 Ex = Ex->IgnoreParenLValueCasts();
70 switch (Ex->getStmtClass()) {
71 default:
72 break;
73 case Stmt::DeclRefExprClass: {
74 const DeclRefExpr *DR = cast<DeclRefExpr>(Ex);
75 if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) {
76 os << " (" << (loadedFrom ? "loaded from" : "from")
77 << " variable '" << VD->getName() << "')";
78 Ranges.push_back(DR->getSourceRange());
79 }
80 break;
81 }
82 case Stmt::MemberExprClass: {
83 const MemberExpr *ME = cast<MemberExpr>(Ex);
84 os << " (" << (loadedFrom ? "loaded from" : "via")
85 << " field '" << ME->getMemberNameInfo() << "')";
87 Ranges.push_back(SourceRange(L, L));
88 break;
89 }
90 case Stmt::ObjCIvarRefExprClass: {
91 const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(Ex);
92 os << " (" << (loadedFrom ? "loaded from" : "via")
93 << " ivar '" << IV->getDecl()->getName() << "')";
95 Ranges.push_back(SourceRange(L, L));
96 break;
97 }
98 }
99}
100
101static const Expr *getDereferenceExpr(const Stmt *S, bool IsBind=false){
102 const Expr *E = nullptr;
103
104 // Walk through lvalue casts to get the original expression
105 // that syntactically caused the load.
106 if (const Expr *expr = dyn_cast<Expr>(S))
107 E = expr->IgnoreParenLValueCasts();
108
109 if (IsBind) {
110 const VarDecl *VD;
111 const Expr *Init;
112 std::tie(VD, Init) = parseAssignment(S);
113 if (VD && Init)
114 E = Init;
115 }
116 return E;
117}
118
119bool DereferenceChecker::suppressReport(CheckerContext &C,
120 const Expr *E) const {
121 // Do not report dereferences on memory that use address space #256, #257,
122 // and #258. Those address spaces are used when dereferencing address spaces
123 // relative to the GS, FS, and SS segments on x86/x86-64 targets.
124 // Dereferencing a null pointer in these address spaces is not defined
125 // as an error. All other null dereferences in other address spaces
126 // are defined as an error unless explicitly defined.
127 // See https://clang.llvm.org/docs/LanguageExtensions.html, the section
128 // "X86/X86-64 Language Extensions"
129
130 QualType Ty = E->getType();
131 if (!Ty.hasAddressSpace())
132 return false;
133 if (SuppressAddressSpaces)
134 return true;
135
136 const llvm::Triple::ArchType Arch =
137 C.getASTContext().getTargetInfo().getTriple().getArch();
138
139 if ((Arch == llvm::Triple::x86) || (Arch == llvm::Triple::x86_64)) {
141 case 256:
142 case 257:
143 case 258:
144 return true;
145 }
146 }
147 return false;
148}
149
150static bool isDeclRefExprToReference(const Expr *E) {
151 if (const auto *DRE = dyn_cast<DeclRefExpr>(E))
152 return DRE->getDecl()->getType()->isReferenceType();
153 return false;
154}
155
156void DereferenceChecker::reportBug(DerefKind K, ProgramStateRef State,
157 const Stmt *S, CheckerContext &C) const {
158 const BugType *BT = nullptr;
159 llvm::StringRef DerefStr1;
160 llvm::StringRef DerefStr2;
161 switch (K) {
162 case DerefKind::NullPointer:
163 BT = &BT_Null;
164 DerefStr1 = " results in a null pointer dereference";
165 DerefStr2 = " results in a dereference of a null pointer";
166 break;
167 case DerefKind::UndefinedPointerValue:
168 BT = &BT_Undef;
169 DerefStr1 = " results in an undefined pointer dereference";
170 DerefStr2 = " results in a dereference of an undefined pointer value";
171 break;
172 case DerefKind::AddressOfLabel:
173 BT = &BT_Label;
174 DerefStr1 = " results in an undefined pointer dereference";
175 DerefStr2 = " results in a dereference of an address of a label";
176 break;
177 };
178
179 // Generate an error node.
180 ExplodedNode *N = C.generateErrorNode(State);
181 if (!N)
182 return;
183
185 llvm::raw_svector_ostream os(buf);
186
188
189 switch (S->getStmtClass()) {
190 case Stmt::ArraySubscriptExprClass: {
191 os << "Array access";
192 const ArraySubscriptExpr *AE = cast<ArraySubscriptExpr>(S);
193 AddDerefSource(os, Ranges, AE->getBase()->IgnoreParenCasts(),
194 State.get(), N->getLocationContext());
195 os << DerefStr1;
196 break;
197 }
198 case Stmt::ArraySectionExprClass: {
199 os << "Array access";
200 const ArraySectionExpr *AE = cast<ArraySectionExpr>(S);
201 AddDerefSource(os, Ranges, AE->getBase()->IgnoreParenCasts(),
202 State.get(), N->getLocationContext());
203 os << DerefStr1;
204 break;
205 }
206 case Stmt::UnaryOperatorClass: {
207 os << BT->getDescription();
208 const UnaryOperator *U = cast<UnaryOperator>(S);
209 AddDerefSource(os, Ranges, U->getSubExpr()->IgnoreParens(),
210 State.get(), N->getLocationContext(), true);
211 break;
212 }
213 case Stmt::MemberExprClass: {
214 const MemberExpr *M = cast<MemberExpr>(S);
215 if (M->isArrow() || isDeclRefExprToReference(M->getBase())) {
216 os << "Access to field '" << M->getMemberNameInfo() << "'" << DerefStr2;
217 AddDerefSource(os, Ranges, M->getBase()->IgnoreParenCasts(),
218 State.get(), N->getLocationContext(), true);
219 }
220 break;
221 }
222 case Stmt::ObjCIvarRefExprClass: {
223 const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(S);
224 os << "Access to instance variable '" << *IV->getDecl() << "'" << DerefStr2;
225 AddDerefSource(os, Ranges, IV->getBase()->IgnoreParenCasts(),
226 State.get(), N->getLocationContext(), true);
227 break;
228 }
229 default:
230 break;
231 }
232
233 auto report = std::make_unique<PathSensitiveBugReport>(
234 *BT, buf.empty() ? BT->getDescription() : buf.str(), N);
235
237
239 I = Ranges.begin(), E = Ranges.end(); I!=E; ++I)
240 report->addRange(*I);
241
242 C.emitReport(std::move(report));
243}
244
245void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S,
246 CheckerContext &C) const {
247 // Check for dereference of an undefined value.
248 if (l.isUndef()) {
249 const Expr *DerefExpr = getDereferenceExpr(S);
250 if (!suppressReport(C, DerefExpr))
251 reportBug(DerefKind::UndefinedPointerValue, C.getState(), DerefExpr, C);
252 return;
253 }
254
256
257 // Check for null dereferences.
258 if (!isa<Loc>(location))
259 return;
260
261 ProgramStateRef state = C.getState();
262
263 ProgramStateRef notNullState, nullState;
264 std::tie(notNullState, nullState) = state->assume(location);
265
266 if (nullState) {
267 if (!notNullState) {
268 // We know that 'location' can only be null. This is what
269 // we call an "explicit" null dereference.
270 const Expr *expr = getDereferenceExpr(S);
271 if (!suppressReport(C, expr)) {
272 reportBug(DerefKind::NullPointer, nullState, expr, C);
273 return;
274 }
275 }
276
277 // Otherwise, we have the case where the location could either be
278 // null or not-null. Record the error node as an "implicit" null
279 // dereference.
280 if (ExplodedNode *N = C.generateSink(nullState, C.getPredecessor())) {
281 ImplicitNullDerefEvent event = {l, isLoad, N, &C.getBugReporter(),
282 /*IsDirectDereference=*/true};
283 dispatchEvent(event);
284 }
285 }
286
287 // From this point forward, we know that the location is not null.
288 C.addTransition(notNullState);
289}
290
291void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S,
292 CheckerContext &C) const {
293 // If we're binding to a reference, check if the value is known to be null.
294 if (V.isUndef())
295 return;
296
297 // One should never write to label addresses.
298 if (auto Label = L.getAs<loc::GotoLabel>()) {
299 reportBug(DerefKind::AddressOfLabel, C.getState(), S, C);
300 return;
301 }
302
303 const MemRegion *MR = L.getAsRegion();
304 const TypedValueRegion *TVR = dyn_cast_or_null<TypedValueRegion>(MR);
305 if (!TVR)
306 return;
307
308 if (!TVR->getValueType()->isReferenceType())
309 return;
310
311 ProgramStateRef State = C.getState();
312
313 ProgramStateRef StNonNull, StNull;
314 std::tie(StNonNull, StNull) = State->assume(V.castAs<DefinedOrUnknownSVal>());
315
316 if (StNull) {
317 if (!StNonNull) {
318 const Expr *expr = getDereferenceExpr(S, /*IsBind=*/true);
319 if (!suppressReport(C, expr)) {
320 reportBug(DerefKind::NullPointer, StNull, expr, C);
321 return;
322 }
323 }
324
325 // At this point the value could be either null or non-null.
326 // Record this as an "implicit" null dereference.
327 if (ExplodedNode *N = C.generateSink(StNull, C.getPredecessor())) {
328 ImplicitNullDerefEvent event = {V, /*isLoad=*/true, N,
329 &C.getBugReporter(),
330 /*IsDirectDereference=*/true};
331 dispatchEvent(event);
332 }
333 }
334
335 // Unlike a regular null dereference, initializing a reference with a
336 // dereferenced null pointer does not actually cause a runtime exception in
337 // Clang's implementation of references.
338 //
339 // int &r = *p; // safe??
340 // if (p != NULL) return; // uh-oh
341 // r = 5; // trap here
342 //
343 // The standard says this is invalid as soon as we try to create a "null
344 // reference" (there is no such thing), but turning this into an assumption
345 // that 'p' is never null will not match our actual runtime behavior.
346 // So we do not record this assumption, allowing us to warn on the last line
347 // of this example.
348 //
349 // We do need to add a transition because we may have generated a sink for
350 // the "implicit" null dereference.
351 C.addTransition(State, this);
352}
353
354void ento::registerDereferenceChecker(CheckerManager &mgr) {
355 auto *Chk = mgr.registerChecker<DereferenceChecker>();
356 Chk->SuppressAddressSpaces = mgr.getAnalyzerOptions().getCheckerBooleanOption(
357 mgr.getCurrentCheckerName(), "SuppressAddressSpaces");
358}
359
360bool ento::shouldRegisterDereferenceChecker(const CheckerManager &mgr) {
361 return true;
362}
#define V(N, I)
Definition: ASTContext.h:3443
Expr * E
static const Expr * getDereferenceExpr(const Stmt *S, bool IsBind=false)
static bool isDeclRefExprToReference(const Expr *E)
std::string Label
bool getCheckerBooleanOption(StringRef CheckerName, StringRef OptionName, bool SearchInParents=false) const
Interprets an option's string value as a boolean.
This class represents BOTH the OpenMP Array Section and OpenACC 'subarray', with a boolean differenti...
Definition: Expr.h:6986
Expr * getBase()
Get base of the array section.
Definition: Expr.h:7052
ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting.
Definition: Expr.h:2718
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1265
ValueDecl * getDecl()
Definition: Expr.h:1333
This represents one expression.
Definition: Expr.h:110
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3095
Expr * IgnoreParenLValueCasts() LLVM_READONLY
Skip past any parentheses and lvalue casts which might surround this expression until reaching a fixe...
Definition: Expr.cpp:3107
QualType getType() const
Definition: Expr.h:142
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3236
SourceLocation getMemberLoc() const
getMemberLoc - Return the location of the "member", in X->F, it is the location of 'F'.
Definition: Expr.h:3425
Expr * getBase() const
Definition: Expr.h:3313
DeclarationNameInfo getMemberNameInfo() const
Retrieve the member declaration name info.
Definition: Expr.h:3413
bool isArrow() const
Definition: Expr.h:3420
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:280
ObjCIvarRefExpr - A reference to an ObjC instance variable.
Definition: ExprObjC.h:549
SourceLocation getLocation() const
Definition: ExprObjC.h:592
ObjCIvarDecl * getDecl()
Definition: ExprObjC.h:579
const Expr * getBase() const
Definition: ExprObjC.h:583
A (possibly-)qualified type.
Definition: Type.h:929
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:8057
bool hasAddressSpace() const
Check if this type has any address space qualifier.
Definition: Type.h:8052
Encodes a location in the source.
A trivial tuple used to represent a source range.
Stmt - This represents one statement.
Definition: Stmt.h:84
StmtClass getStmtClass() const
Definition: Stmt.h:1380
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:333
bool isReferenceType() const
Definition: Type.h:8204
UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...
Definition: Expr.h:2232
Represents a variable declaration or definition.
Definition: Decl.h:882
StringRef getDescription() const
Definition: BugType.h:48
const AnalyzerOptions & getAnalyzerOptions() const
CHECKER * registerChecker(AT &&... Args)
Used to register checkers.
CheckerNameRef getCurrentCheckerName() const
const LocationContext * getLocationContext() const
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:97
ProgramState - This class encapsulates:
Definition: ProgramState.h:71
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:56
bool isUndef() const
Definition: SVals.h:107
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
Definition: SVals.h:87
const MemRegion * getAsRegion() const
Definition: SVals.cpp:120
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition: SVals.h:83
TypedValueRegion - An abstract class representing regions having a typed value.
Definition: MemRegion.h:535
virtual QualType getValueType() const =0
Defines the clang::TargetInfo interface.
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
const Expr * getDerefExpr(const Stmt *S)
Given that expression S represents a pointer that would be dereferenced, try to find a sub-expression...
bool trackExpressionValue(const ExplodedNode *N, const Expr *E, PathSensitiveBugReport &R, TrackingOptions Opts={})
Attempts to add visitors to track expression value back to its point of origin.
std::pair< const clang::VarDecl *, const clang::Expr * > parseAssignment(const Stmt *S)
The JSON file list parser is used to communicate input to InstallAPI.
unsigned toTargetAddressSpace(LangAS AS)
Definition: AddressSpaces.h:81
We dereferenced a location that may be null.
Definition: Checker.h:548