clang 20.0.0git
SimpleSValBuilder.cpp
Go to the documentation of this file.
1// SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*-
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines SimpleSValBuilder, a basic implementation of SValBuilder.
10//
11//===----------------------------------------------------------------------===//
12
18#include <optional>
19
20using namespace clang;
21using namespace ento;
22
23namespace {
24class SimpleSValBuilder : public SValBuilder {
25
26 // Query the constraint manager whether the SVal has only one possible
27 // (integer) value. If that is the case, the value is returned. Otherwise,
28 // returns NULL.
29 // This is an implementation detail. Checkers should use `getKnownValue()`
30 // instead.
31 static const llvm::APSInt *getConstValue(ProgramStateRef state, SVal V);
32
33 // Helper function that returns the value stored in a nonloc::ConcreteInt or
34 // loc::ConcreteInt.
35 static const llvm::APSInt *getConcreteValue(SVal V);
36
37 // With one `simplifySValOnce` call, a compound symbols might collapse to
38 // simpler symbol tree that is still possible to further simplify. Thus, we
39 // do the simplification on a new symbol tree until we reach the simplest
40 // form, i.e. the fixpoint.
41 // Consider the following symbol `(b * b) * b * b` which has this tree:
42 // *
43 // / \
44 // * b
45 // / \
46 // / b
47 // (b * b)
48 // Now, if the `b * b == 1` new constraint is added then during the first
49 // iteration we have the following transformations:
50 // * *
51 // / \ / \
52 // * b --> b b
53 // / \
54 // / b
55 // 1
56 // We need another iteration to reach the final result `1`.
57 SVal simplifyUntilFixpoint(ProgramStateRef State, SVal Val);
58
59 // Recursively descends into symbolic expressions and replaces symbols
60 // with their known values (in the sense of the getConstValue() method).
61 // We traverse the symbol tree and query the constraint values for the
62 // sub-trees and if a value is a constant we do the constant folding.
63 SVal simplifySValOnce(ProgramStateRef State, SVal V);
64
65public:
66 SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
67 ProgramStateManager &stateMgr)
68 : SValBuilder(alloc, context, stateMgr) {}
69 ~SimpleSValBuilder() override {}
70
72 NonLoc lhs, NonLoc rhs, QualType resultTy) override;
74 Loc lhs, Loc rhs, QualType resultTy) override;
76 Loc lhs, NonLoc rhs, QualType resultTy) override;
77
78 /// Evaluates a given SVal by recursively evaluating and
79 /// simplifying the children SVals. If the SVal has only one possible
80 /// (integer) value, that value is returned. Otherwise, returns NULL.
81 const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override;
82
83 /// Evaluates a given SVal by recursively evaluating and simplifying the
84 /// children SVals, then returns its minimal possible (integer) value. If the
85 /// constraint manager cannot provide a meaningful answer, this returns NULL.
86 const llvm::APSInt *getMinValue(ProgramStateRef state, SVal V) override;
87
88 /// Evaluates a given SVal by recursively evaluating and simplifying the
89 /// children SVals, then returns its maximal possible (integer) value. If the
90 /// constraint manager cannot provide a meaningful answer, this returns NULL.
91 const llvm::APSInt *getMaxValue(ProgramStateRef state, SVal V) override;
92
93 SVal simplifySVal(ProgramStateRef State, SVal V) override;
94
95 SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op,
96 const llvm::APSInt &RHS, QualType resultTy);
97};
98} // end anonymous namespace
99
100SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc,
101 ASTContext &context,
102 ProgramStateManager &stateMgr) {
103 return new SimpleSValBuilder(alloc, context, stateMgr);
104}
105
106// Checks if the negation the value and flipping sign preserve
107// the semantics on the operation in the resultType
108static bool isNegationValuePreserving(const llvm::APSInt &Value,
109 APSIntType ResultType) {
110 const unsigned ValueBits = Value.getSignificantBits();
111 if (ValueBits == ResultType.getBitWidth()) {
112 // The value is the lowest negative value that is representable
113 // in signed integer with bitWith of result type. The
114 // negation is representable if resultType is unsigned.
115 return ResultType.isUnsigned();
116 }
117
118 // If resultType bitWith is higher that number of bits required
119 // to represent RHS, the sign flip produce same value.
120 return ValueBits < ResultType.getBitWidth();
121}
122
123//===----------------------------------------------------------------------===//
124// Transfer function for binary operators.
125//===----------------------------------------------------------------------===//
126
127SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS,
129 const llvm::APSInt &RHS,
130 QualType resultTy) {
131 bool isIdempotent = false;
132
133 // Check for a few special cases with known reductions first.
134 switch (op) {
135 default:
136 // We can't reduce this case; just treat it normally.
137 break;
138 case BO_Mul:
139 // a*0 and a*1
140 if (RHS == 0)
141 return makeIntVal(0, resultTy);
142 else if (RHS == 1)
143 isIdempotent = true;
144 break;
145 case BO_Div:
146 // a/0 and a/1
147 if (RHS == 0)
148 // This is also handled elsewhere.
149 return UndefinedVal();
150 else if (RHS == 1)
151 isIdempotent = true;
152 break;
153 case BO_Rem:
154 // a%0 and a%1
155 if (RHS == 0)
156 // This is also handled elsewhere.
157 return UndefinedVal();
158 else if (RHS == 1)
159 return makeIntVal(0, resultTy);
160 break;
161 case BO_Add:
162 case BO_Sub:
163 case BO_Shl:
164 case BO_Shr:
165 case BO_Xor:
166 // a+0, a-0, a<<0, a>>0, a^0
167 if (RHS == 0)
168 isIdempotent = true;
169 break;
170 case BO_And:
171 // a&0 and a&(~0)
172 if (RHS == 0)
173 return makeIntVal(0, resultTy);
174 else if (RHS.isAllOnes())
175 isIdempotent = true;
176 break;
177 case BO_Or:
178 // a|0 and a|(~0)
179 if (RHS == 0)
180 isIdempotent = true;
181 else if (RHS.isAllOnes()) {
182 const llvm::APSInt &Result = BasicVals.Convert(resultTy, RHS);
183 return nonloc::ConcreteInt(Result);
184 }
185 break;
186 }
187
188 // Idempotent ops (like a*1) can still change the type of an expression.
189 // Wrap the LHS up in a NonLoc again and let evalCast do the
190 // dirty work.
191 if (isIdempotent)
192 return evalCast(nonloc::SymbolVal(LHS), resultTy, QualType{});
193
194 // If we reach this point, the expression cannot be simplified.
195 // Make a SymbolVal for the entire expression, after converting the RHS.
196 const llvm::APSInt *ConvertedRHS = &RHS;
198 // We're looking for a type big enough to compare the symbolic value
199 // with the given constant.
200 // FIXME: This is an approximation of Sema::UsualArithmeticConversions.
201 ASTContext &Ctx = getContext();
202 QualType SymbolType = LHS->getType();
203 uint64_t ValWidth = RHS.getBitWidth();
204 uint64_t TypeWidth = Ctx.getTypeSize(SymbolType);
205
206 if (ValWidth < TypeWidth) {
207 // If the value is too small, extend it.
208 ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
209 } else if (ValWidth == TypeWidth) {
210 // If the value is signed but the symbol is unsigned, do the comparison
211 // in unsigned space. [C99 6.3.1.8]
212 // (For the opposite case, the value is already unsigned.)
213 if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType())
214 ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
215 }
216 } else if (BinaryOperator::isAdditiveOp(op) && RHS.isNegative()) {
217 // Change a+(-N) into a-N, and a-(-N) into a+N
218 // Adjust addition/subtraction of negative value, to
219 // subtraction/addition of the negated value.
220 APSIntType resultIntTy = BasicVals.getAPSIntType(resultTy);
221 if (isNegationValuePreserving(RHS, resultIntTy)) {
222 ConvertedRHS = &BasicVals.getValue(-resultIntTy.convert(RHS));
223 op = (op == BO_Add) ? BO_Sub : BO_Add;
224 } else {
225 ConvertedRHS = &BasicVals.Convert(resultTy, RHS);
226 }
227 } else
228 ConvertedRHS = &BasicVals.Convert(resultTy, RHS);
229
230 return makeNonLoc(LHS, op, *ConvertedRHS, resultTy);
231}
232
233// See if Sym is known to be a relation Rel with Bound.
235 llvm::APSInt Bound, ProgramStateRef State) {
236 SValBuilder &SVB = State->getStateManager().getSValBuilder();
237 SVal Result =
238 SVB.evalBinOpNN(State, Rel, nonloc::SymbolVal(Sym),
240 if (auto DV = Result.getAs<DefinedSVal>()) {
241 return !State->assume(*DV, false);
242 }
243 return false;
244}
245
246// See if Sym is known to be within [min/4, max/4], where min and max
247// are the bounds of the symbol's integral type. With such symbols,
248// some manipulations can be performed without the risk of overflow.
249// assume() doesn't cause infinite recursion because we should be dealing
250// with simpler symbols on every recursive call.
252 ProgramStateRef State) {
253 SValBuilder &SVB = State->getStateManager().getSValBuilder();
255
256 QualType T = Sym->getType();
258 "This only works with signed integers!");
259 APSIntType AT = BV.getAPSIntType(T);
260
261 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
262 return isInRelation(BO_LE, Sym, Max, State) &&
263 isInRelation(BO_GE, Sym, Min, State);
264}
265
266// Same for the concrete integers: see if I is within [min/4, max/4].
267static bool isWithinConstantOverflowBounds(llvm::APSInt I) {
268 APSIntType AT(I);
269 assert(!AT.isUnsigned() &&
270 "This only works with signed integers!");
271
272 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
273 return (I <= Max) && (I >= -Max);
274}
275
276static std::pair<SymbolRef, llvm::APSInt>
278 if (const auto *SymInt = dyn_cast<SymIntExpr>(Sym))
279 if (BinaryOperator::isAdditiveOp(SymInt->getOpcode()))
280 return std::make_pair(SymInt->getLHS(),
281 (SymInt->getOpcode() == BO_Add) ?
282 (SymInt->getRHS()) :
283 (-SymInt->getRHS()));
284
285 // Fail to decompose: "reduce" the problem to the "$x + 0" case.
286 return std::make_pair(Sym, BV.getValue(0, Sym->getType()));
287}
288
289// Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the
290// same signed integral type and no overflows occur (which should be checked
291// by the caller).
294 SymbolRef LSym, llvm::APSInt LInt,
295 SymbolRef RSym, llvm::APSInt RInt) {
296 SValBuilder &SVB = State->getStateManager().getSValBuilder();
298 SymbolManager &SymMgr = SVB.getSymbolManager();
299
300 QualType SymTy = LSym->getType();
301 assert(SymTy == RSym->getType() &&
302 "Symbols are not of the same type!");
303 assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) &&
304 "Integers are not of the same type as symbols!");
305 assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) &&
306 "Integers are not of the same type as symbols!");
307
308 QualType ResultTy;
310 ResultTy = SVB.getConditionType();
311 else if (BinaryOperator::isAdditiveOp(Op))
312 ResultTy = SymTy;
313 else
314 llvm_unreachable("Operation not suitable for unchecked rearrangement!");
315
316 if (LSym == RSym)
317 return SVB.evalBinOpNN(State, Op, nonloc::ConcreteInt(LInt),
318 nonloc::ConcreteInt(RInt), ResultTy)
319 .castAs<NonLoc>();
320
321 SymbolRef ResultSym = nullptr;
322 BinaryOperator::Opcode ResultOp;
323 llvm::APSInt ResultInt;
325 // Prefer comparing to a non-negative number.
326 // FIXME: Maybe it'd be better to have consistency in
327 // "$x - $y" vs. "$y - $x" because those are solver's keys.
328 if (LInt > RInt) {
329 ResultSym = SymMgr.getSymSymExpr(RSym, BO_Sub, LSym, SymTy);
331 ResultInt = LInt - RInt; // Opposite order!
332 } else {
333 ResultSym = SymMgr.getSymSymExpr(LSym, BO_Sub, RSym, SymTy);
334 ResultOp = Op;
335 ResultInt = RInt - LInt; // Opposite order!
336 }
337 } else {
338 ResultSym = SymMgr.getSymSymExpr(LSym, Op, RSym, SymTy);
339 ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt);
340 ResultOp = BO_Add;
341 // Bring back the cosmetic difference.
342 if (ResultInt < 0) {
343 ResultInt = -ResultInt;
344 ResultOp = BO_Sub;
345 } else if (ResultInt == 0) {
346 // Shortcut: Simplify "$x + 0" to "$x".
347 return nonloc::SymbolVal(ResultSym);
348 }
349 }
350 const llvm::APSInt &PersistentResultInt = BV.getValue(ResultInt);
351 return nonloc::SymbolVal(
352 SymMgr.getSymIntExpr(ResultSym, ResultOp, PersistentResultInt, ResultTy));
353}
354
355// Rearrange if symbol type matches the result type and if the operator is a
356// comparison operator, both symbol and constant must be within constant
357// overflow bounds.
359 SymbolRef Sym, llvm::APSInt Int, QualType Ty) {
360 return Sym->getType() == Ty &&
362 (isWithinConstantOverflowBounds(Sym, State) &&
364}
365
366static std::optional<NonLoc> tryRearrange(ProgramStateRef State,
368 NonLoc Rhs, QualType ResultTy) {
369 ProgramStateManager &StateMgr = State->getStateManager();
370 SValBuilder &SVB = StateMgr.getSValBuilder();
371
372 // We expect everything to be of the same type - this type.
373 QualType SingleTy;
374
375 // FIXME: After putting complexity threshold to the symbols we can always
376 // rearrange additive operations but rearrange comparisons only if
377 // option is set.
378 if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation)
379 return std::nullopt;
380
381 SymbolRef LSym = Lhs.getAsSymbol();
382 if (!LSym)
383 return std::nullopt;
384
386 SingleTy = LSym->getType();
387 if (ResultTy != SVB.getConditionType())
388 return std::nullopt;
389 // Initialize SingleTy later with a symbol's type.
390 } else if (BinaryOperator::isAdditiveOp(Op)) {
391 SingleTy = ResultTy;
392 if (LSym->getType() != SingleTy)
393 return std::nullopt;
394 } else {
395 // Don't rearrange other operations.
396 return std::nullopt;
397 }
398
399 assert(!SingleTy.isNull() && "We should have figured out the type by now!");
400
401 // Rearrange signed symbolic expressions only
402 if (!SingleTy->isSignedIntegerOrEnumerationType())
403 return std::nullopt;
404
405 SymbolRef RSym = Rhs.getAsSymbol();
406 if (!RSym || RSym->getType() != SingleTy)
407 return std::nullopt;
408
409 BasicValueFactory &BV = State->getBasicVals();
410 llvm::APSInt LInt, RInt;
411 std::tie(LSym, LInt) = decomposeSymbol(LSym, BV);
412 std::tie(RSym, RInt) = decomposeSymbol(RSym, BV);
413 if (!shouldRearrange(State, Op, LSym, LInt, SingleTy) ||
414 !shouldRearrange(State, Op, RSym, RInt, SingleTy))
415 return std::nullopt;
416
417 // We know that no overflows can occur anymore.
418 return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt);
419}
420
421SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
423 NonLoc lhs, NonLoc rhs,
424 QualType resultTy) {
425 NonLoc InputLHS = lhs;
426 NonLoc InputRHS = rhs;
427
428 // Constraints may have changed since the creation of a bound SVal. Check if
429 // the values can be simplified based on those new constraints.
430 SVal simplifiedLhs = simplifySVal(state, lhs);
431 SVal simplifiedRhs = simplifySVal(state, rhs);
432 if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>())
433 lhs = *simplifiedLhsAsNonLoc;
434 if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>())
435 rhs = *simplifiedRhsAsNonLoc;
436
437 // Handle trivial case where left-side and right-side are the same.
438 if (lhs == rhs)
439 switch (op) {
440 default:
441 break;
442 case BO_EQ:
443 case BO_LE:
444 case BO_GE:
445 return makeTruthVal(true, resultTy);
446 case BO_LT:
447 case BO_GT:
448 case BO_NE:
449 return makeTruthVal(false, resultTy);
450 case BO_Xor:
451 case BO_Sub:
452 if (resultTy->isIntegralOrEnumerationType())
453 return makeIntVal(0, resultTy);
454 return evalCast(makeIntVal(0, /*isUnsigned=*/false), resultTy,
455 QualType{});
456 case BO_Or:
457 case BO_And:
458 return evalCast(lhs, resultTy, QualType{});
459 }
460
461 while (true) {
462 switch (lhs.getKind()) {
463 default:
464 return makeSymExprValNN(op, lhs, rhs, resultTy);
465 case nonloc::PointerToMemberKind: {
466 assert(rhs.getKind() == nonloc::PointerToMemberKind &&
467 "Both SVals should have pointer-to-member-type");
468 auto LPTM = lhs.castAs<nonloc::PointerToMember>(),
469 RPTM = rhs.castAs<nonloc::PointerToMember>();
470 auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData();
471 switch (op) {
472 case BO_EQ:
473 return makeTruthVal(LPTMD == RPTMD, resultTy);
474 case BO_NE:
475 return makeTruthVal(LPTMD != RPTMD, resultTy);
476 default:
477 return UnknownVal();
478 }
479 }
480 case nonloc::LocAsIntegerKind: {
481 Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc();
482 switch (rhs.getKind()) {
483 case nonloc::LocAsIntegerKind:
484 // FIXME: at the moment the implementation
485 // of modeling "pointers as integers" is not complete.
487 return UnknownVal();
488 return evalBinOpLL(state, op, lhsL,
490 resultTy);
491 case nonloc::ConcreteIntKind: {
492 // FIXME: at the moment the implementation
493 // of modeling "pointers as integers" is not complete.
495 return UnknownVal();
496 // Transform the integer into a location and compare.
497 // FIXME: This only makes sense for comparisons. If we want to, say,
498 // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it,
499 // then pack it back into a LocAsInteger.
500 llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue();
501 // If the region has a symbolic base, pay attention to the type; it
502 // might be coming from a non-default address space. For non-symbolic
503 // regions it doesn't matter that much because such comparisons would
504 // most likely evaluate to concrete false anyway. FIXME: We might
505 // still need to handle the non-comparison case.
506 if (SymbolRef lSym = lhs.getAsLocSymbol(true))
507 BasicVals.getAPSIntType(lSym->getType()).apply(i);
508 else
509 BasicVals.getAPSIntType(Context.VoidPtrTy).apply(i);
510 return evalBinOpLL(state, op, lhsL, makeLoc(i), resultTy);
511 }
512 default:
513 switch (op) {
514 case BO_EQ:
515 return makeTruthVal(false, resultTy);
516 case BO_NE:
517 return makeTruthVal(true, resultTy);
518 default:
519 // This case also handles pointer arithmetic.
520 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
521 }
522 }
523 }
524 case nonloc::ConcreteIntKind: {
525 llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue();
526
527 // If we're dealing with two known constants, just perform the operation.
528 if (const llvm::APSInt *KnownRHSValue = getConstValue(state, rhs)) {
529 llvm::APSInt RHSValue = *KnownRHSValue;
531 // We're looking for a type big enough to compare the two values.
532 // FIXME: This is not correct. char + short will result in a promotion
533 // to int. Unfortunately we have lost types by this point.
534 APSIntType CompareType = std::max(APSIntType(LHSValue),
535 APSIntType(RHSValue));
536 CompareType.apply(LHSValue);
537 CompareType.apply(RHSValue);
538 } else if (!BinaryOperator::isShiftOp(op)) {
539 APSIntType IntType = BasicVals.getAPSIntType(resultTy);
540 IntType.apply(LHSValue);
541 IntType.apply(RHSValue);
542 }
543
544 const llvm::APSInt *Result =
545 BasicVals.evalAPSInt(op, LHSValue, RHSValue);
546 if (!Result) {
547 if (op == BO_Shl || op == BO_Shr) {
548 // FIXME: At this point the constant folding claims that the result
549 // of a bitwise shift is undefined. However, constant folding
550 // relies on the inaccurate type information that is stored in the
551 // bit size of APSInt objects, and if we reached this point, then
552 // the checker core.BitwiseShift already determined that the shift
553 // is valid (in a PreStmt callback, by querying the real type from
554 // the AST node).
555 // To avoid embarrassing false positives, let's just say that we
556 // don't know anything about the result of the shift.
557 return UnknownVal();
558 }
559 return UndefinedVal();
560 }
561
562 return nonloc::ConcreteInt(*Result);
563 }
564
565 // Swap the left and right sides and flip the operator if doing so
566 // allows us to better reason about the expression (this is a form
567 // of expression canonicalization).
568 // While we're at it, catch some special cases for non-commutative ops.
569 switch (op) {
570 case BO_LT:
571 case BO_GT:
572 case BO_LE:
573 case BO_GE:
575 [[fallthrough]];
576 case BO_EQ:
577 case BO_NE:
578 case BO_Add:
579 case BO_Mul:
580 case BO_And:
581 case BO_Xor:
582 case BO_Or:
583 std::swap(lhs, rhs);
584 continue;
585 case BO_Shr:
586 // (~0)>>a
587 if (LHSValue.isAllOnes() && LHSValue.isSigned())
588 return evalCast(lhs, resultTy, QualType{});
589 [[fallthrough]];
590 case BO_Shl:
591 // 0<<a and 0>>a
592 if (LHSValue == 0)
593 return evalCast(lhs, resultTy, QualType{});
594 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
595 case BO_Div:
596 // 0 / x == 0
597 case BO_Rem:
598 // 0 % x == 0
599 if (LHSValue == 0)
600 return makeZeroVal(resultTy);
601 [[fallthrough]];
602 default:
603 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
604 }
605 }
606 case nonloc::SymbolValKind: {
607 // We only handle LHS as simple symbols or SymIntExprs.
608 SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
609
610 // LHS is a symbolic expression.
611 if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) {
612
613 // Is this a logical not? (!x is represented as x == 0.)
614 if (op == BO_EQ && rhs.isZeroConstant()) {
615 // We know how to negate certain expressions. Simplify them here.
616
617 BinaryOperator::Opcode opc = symIntExpr->getOpcode();
618 switch (opc) {
619 default:
620 // We don't know how to negate this operation.
621 // Just handle it as if it were a normal comparison to 0.
622 break;
623 case BO_LAnd:
624 case BO_LOr:
625 llvm_unreachable("Logical operators handled by branching logic.");
626 case BO_Assign:
627 case BO_MulAssign:
628 case BO_DivAssign:
629 case BO_RemAssign:
630 case BO_AddAssign:
631 case BO_SubAssign:
632 case BO_ShlAssign:
633 case BO_ShrAssign:
634 case BO_AndAssign:
635 case BO_XorAssign:
636 case BO_OrAssign:
637 case BO_Comma:
638 llvm_unreachable("'=' and ',' operators handled by ExprEngine.");
639 case BO_PtrMemD:
640 case BO_PtrMemI:
641 llvm_unreachable("Pointer arithmetic not handled here.");
642 case BO_LT:
643 case BO_GT:
644 case BO_LE:
645 case BO_GE:
646 case BO_EQ:
647 case BO_NE:
648 assert(resultTy->isBooleanType() ||
649 resultTy == getConditionType());
650 assert(symIntExpr->getType()->isBooleanType() ||
651 getContext().hasSameUnqualifiedType(symIntExpr->getType(),
652 getConditionType()));
653 // Negate the comparison and make a value.
655 return makeNonLoc(symIntExpr->getLHS(), opc,
656 symIntExpr->getRHS(), resultTy);
657 }
658 }
659
660 // For now, only handle expressions whose RHS is a constant.
661 if (const llvm::APSInt *RHSValue = getConstValue(state, rhs)) {
662 // If both the LHS and the current expression are additive,
663 // fold their constants and try again.
665 BinaryOperator::Opcode lop = symIntExpr->getOpcode();
667 // Convert the two constants to a common type, then combine them.
668
669 // resultTy may not be the best type to convert to, but it's
670 // probably the best choice in expressions with mixed type
671 // (such as x+1U+2LL). The rules for implicit conversions should
672 // choose a reasonable type to preserve the expression, and will
673 // at least match how the value is going to be used.
674 APSIntType IntType = BasicVals.getAPSIntType(resultTy);
675 const llvm::APSInt &first = IntType.convert(symIntExpr->getRHS());
676 const llvm::APSInt &second = IntType.convert(*RHSValue);
677
678 // If the op and lop agrees, then we just need to
679 // sum the constants. Otherwise, we change to operation
680 // type if substraction would produce negative value
681 // (and cause overflow for unsigned integers),
682 // as consequence x+1U-10 produces x-9U, instead
683 // of x+4294967287U, that would be produced without this
684 // additional check.
685 const llvm::APSInt *newRHS;
686 if (lop == op) {
687 newRHS = BasicVals.evalAPSInt(BO_Add, first, second);
688 } else if (first >= second) {
689 newRHS = BasicVals.evalAPSInt(BO_Sub, first, second);
690 op = lop;
691 } else {
692 newRHS = BasicVals.evalAPSInt(BO_Sub, second, first);
693 }
694
695 assert(newRHS && "Invalid operation despite common type!");
696 rhs = nonloc::ConcreteInt(*newRHS);
697 lhs = nonloc::SymbolVal(symIntExpr->getLHS());
698 continue;
699 }
700 }
701
702 // Otherwise, make a SymIntExpr out of the expression.
703 return MakeSymIntVal(symIntExpr, op, *RHSValue, resultTy);
704 }
705 }
706
707 // Is the RHS a constant?
708 if (const llvm::APSInt *RHSValue = getConstValue(state, rhs))
709 return MakeSymIntVal(Sym, op, *RHSValue, resultTy);
710
711 if (std::optional<NonLoc> V = tryRearrange(state, op, lhs, rhs, resultTy))
712 return *V;
713
714 // Give up -- this is not a symbolic expression we can handle.
715 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
716 }
717 }
718 }
719}
720
722 const FieldRegion *RightFR,
724 QualType resultTy,
725 SimpleSValBuilder &SVB) {
726 // Only comparisons are meaningful here!
728 return UnknownVal();
729
730 // Next, see if the two FRs have the same super-region.
731 // FIXME: This doesn't handle casts yet, and simply stripping the casts
732 // doesn't help.
733 if (LeftFR->getSuperRegion() != RightFR->getSuperRegion())
734 return UnknownVal();
735
736 const FieldDecl *LeftFD = LeftFR->getDecl();
737 const FieldDecl *RightFD = RightFR->getDecl();
738 const RecordDecl *RD = LeftFD->getParent();
739
740 // Make sure the two FRs are from the same kind of record. Just in case!
741 // FIXME: This is probably where inheritance would be a problem.
742 if (RD != RightFD->getParent())
743 return UnknownVal();
744
745 // We know for sure that the two fields are not the same, since that
746 // would have given us the same SVal.
747 if (op == BO_EQ)
748 return SVB.makeTruthVal(false, resultTy);
749 if (op == BO_NE)
750 return SVB.makeTruthVal(true, resultTy);
751
752 // Iterate through the fields and see which one comes first.
753 // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field
754 // members and the units in which bit-fields reside have addresses that
755 // increase in the order in which they are declared."
756 bool leftFirst = (op == BO_LT || op == BO_LE);
757 for (const auto *I : RD->fields()) {
758 if (I == LeftFD)
759 return SVB.makeTruthVal(leftFirst, resultTy);
760 if (I == RightFD)
761 return SVB.makeTruthVal(!leftFirst, resultTy);
762 }
763
764 llvm_unreachable("Fields not found in parent record's definition");
765}
766
767// This is used in debug builds only for now because some downstream users
768// may hit this assert in their subsequent merges.
769// There are still places in the analyzer where equal bitwidth Locs
770// are compared, and need to be found and corrected. Recent previous fixes have
771// addressed the known problems of making NULLs with specific bitwidths
772// for Loc comparisons along with deprecation of APIs for the same purpose.
773//
774static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc,
775 Loc LhsLoc) {
776 // Implements a "best effort" check for RhsLoc and LhsLoc bit widths
777 ASTContext &Ctx = State->getStateManager().getContext();
778 uint64_t RhsBitwidth =
779 RhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(RhsLoc.getType(Ctx));
780 uint64_t LhsBitwidth =
781 LhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(LhsLoc.getType(Ctx));
782 if (RhsBitwidth && LhsBitwidth && (LhsLoc.getKind() == RhsLoc.getKind())) {
783 assert(RhsBitwidth == LhsBitwidth &&
784 "RhsLoc and LhsLoc bitwidth must be same!");
785 }
786}
787
788// FIXME: all this logic will change if/when we have MemRegion::getLocation().
789SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state,
791 Loc lhs, Loc rhs,
792 QualType resultTy) {
793
794 // Assert that bitwidth of lhs and rhs are the same.
795 // This can happen if two different address spaces are used,
796 // and the bitwidths of the address spaces are different.
797 // See LIT case clang/test/Analysis/cstring-checker-addressspace.c
798 // FIXME: See comment above in the function assertEqualBitWidths
799 assertEqualBitWidths(state, rhs, lhs);
800
801 // Only comparisons and subtractions are valid operations on two pointers.
802 // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15].
803 // However, if a pointer is casted to an integer, evalBinOpNN may end up
804 // calling this function with another operation (PR7527). We don't attempt to
805 // model this for now, but it could be useful, particularly when the
806 // "location" is actually an integer value that's been passed through a void*.
807 if (!(BinaryOperator::isComparisonOp(op) || op == BO_Sub))
808 return UnknownVal();
809
810 // Special cases for when both sides are identical.
811 if (lhs == rhs) {
812 switch (op) {
813 default:
814 llvm_unreachable("Unimplemented operation for two identical values");
815 case BO_Sub:
816 return makeZeroVal(resultTy);
817 case BO_EQ:
818 case BO_LE:
819 case BO_GE:
820 return makeTruthVal(true, resultTy);
821 case BO_NE:
822 case BO_LT:
823 case BO_GT:
824 return makeTruthVal(false, resultTy);
825 }
826 }
827
828 switch (lhs.getKind()) {
829 default:
830 llvm_unreachable("Ordering not implemented for this Loc.");
831
832 case loc::GotoLabelKind:
833 // The only thing we know about labels is that they're non-null.
834 if (rhs.isZeroConstant()) {
835 switch (op) {
836 default:
837 break;
838 case BO_Sub:
839 return evalCast(lhs, resultTy, QualType{});
840 case BO_EQ:
841 case BO_LE:
842 case BO_LT:
843 return makeTruthVal(false, resultTy);
844 case BO_NE:
845 case BO_GT:
846 case BO_GE:
847 return makeTruthVal(true, resultTy);
848 }
849 }
850 // There may be two labels for the same location, and a function region may
851 // have the same address as a label at the start of the function (depending
852 // on the ABI).
853 // FIXME: we can probably do a comparison against other MemRegions, though.
854 // FIXME: is there a way to tell if two labels refer to the same location?
855 return UnknownVal();
856
857 case loc::ConcreteIntKind: {
858 auto L = lhs.castAs<loc::ConcreteInt>();
859
860 // If one of the operands is a symbol and the other is a constant,
861 // build an expression for use by the constraint manager.
862 if (SymbolRef rSym = rhs.getAsLocSymbol()) {
863 // We can only build expressions with symbols on the left,
864 // so we need a reversible operator.
865 if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp)
866 return UnknownVal();
867
869 return makeNonLoc(rSym, op, L.getValue(), resultTy);
870 }
871
872 // If both operands are constants, just perform the operation.
873 if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
874 assert(BinaryOperator::isComparisonOp(op) || op == BO_Sub);
875
876 if (const auto *ResultInt =
877 BasicVals.evalAPSInt(op, L.getValue(), rInt->getValue()))
878 return evalCast(nonloc::ConcreteInt(*ResultInt), resultTy, QualType{});
879 return UnknownVal();
880 }
881
882 // Special case comparisons against NULL.
883 // This must come after the test if the RHS is a symbol, which is used to
884 // build constraints. The address of any non-symbolic region is guaranteed
885 // to be non-NULL, as is any label.
886 assert((isa<loc::MemRegionVal, loc::GotoLabel>(rhs)));
887 if (lhs.isZeroConstant()) {
888 switch (op) {
889 default:
890 break;
891 case BO_EQ:
892 case BO_GT:
893 case BO_GE:
894 return makeTruthVal(false, resultTy);
895 case BO_NE:
896 case BO_LT:
897 case BO_LE:
898 return makeTruthVal(true, resultTy);
899 }
900 }
901
902 // Comparing an arbitrary integer to a region or label address is
903 // completely unknowable.
904 return UnknownVal();
905 }
906 case loc::MemRegionValKind: {
907 if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
908 // If one of the operands is a symbol and the other is a constant,
909 // build an expression for use by the constraint manager.
910 if (SymbolRef lSym = lhs.getAsLocSymbol(true)) {
912 return MakeSymIntVal(lSym, op, rInt->getValue(), resultTy);
913 return UnknownVal();
914 }
915 // Special case comparisons to NULL.
916 // This must come after the test if the LHS is a symbol, which is used to
917 // build constraints. The address of any non-symbolic region is guaranteed
918 // to be non-NULL.
919 if (rInt->isZeroConstant()) {
920 if (op == BO_Sub)
921 return evalCast(lhs, resultTy, QualType{});
922
924 QualType boolType = getContext().BoolTy;
925 NonLoc l = evalCast(lhs, boolType, QualType{}).castAs<NonLoc>();
926 NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>();
927 return evalBinOpNN(state, op, l, r, resultTy);
928 }
929 }
930
931 // Comparing a region to an arbitrary integer is completely unknowable.
932 return UnknownVal();
933 }
934
935 // Get both values as regions, if possible.
936 const MemRegion *LeftMR = lhs.getAsRegion();
937 assert(LeftMR && "MemRegionValKind SVal doesn't have a region!");
938
939 const MemRegion *RightMR = rhs.getAsRegion();
940 if (!RightMR)
941 // The RHS is probably a label, which in theory could address a region.
942 // FIXME: we can probably make a more useful statement about non-code
943 // regions, though.
944 return UnknownVal();
945
946 const MemRegion *LeftBase = LeftMR->getBaseRegion();
947 const MemRegion *RightBase = RightMR->getBaseRegion();
948 const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace();
949 const MemSpaceRegion *RightMS = RightBase->getMemorySpace();
950 const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion();
951
952 // If the two regions are from different known memory spaces they cannot be
953 // equal. Also, assume that no symbolic region (whose memory space is
954 // unknown) is on the stack.
955 if (LeftMS != RightMS &&
956 ((LeftMS != UnknownMS && RightMS != UnknownMS) ||
957 (isa<StackSpaceRegion>(LeftMS) || isa<StackSpaceRegion>(RightMS)))) {
958 switch (op) {
959 default:
960 return UnknownVal();
961 case BO_EQ:
962 return makeTruthVal(false, resultTy);
963 case BO_NE:
964 return makeTruthVal(true, resultTy);
965 }
966 }
967
968 // If both values wrap regions, see if they're from different base regions.
969 // Note, heap base symbolic regions are assumed to not alias with
970 // each other; for example, we assume that malloc returns different address
971 // on each invocation.
972 // FIXME: ObjC object pointers always reside on the heap, but currently
973 // we treat their memory space as unknown, because symbolic pointers
974 // to ObjC objects may alias. There should be a way to construct
975 // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker
976 // guesses memory space for ObjC object pointers manually instead of
977 // relying on us.
978 if (LeftBase != RightBase &&
979 ((!isa<SymbolicRegion>(LeftBase) && !isa<SymbolicRegion>(RightBase)) ||
980 (isa<HeapSpaceRegion>(LeftMS) || isa<HeapSpaceRegion>(RightMS))) ){
981 switch (op) {
982 default:
983 return UnknownVal();
984 case BO_EQ:
985 return makeTruthVal(false, resultTy);
986 case BO_NE:
987 return makeTruthVal(true, resultTy);
988 }
989 }
990
991 // Handle special cases for when both regions are element regions.
992 const ElementRegion *RightER = dyn_cast<ElementRegion>(RightMR);
993 const ElementRegion *LeftER = dyn_cast<ElementRegion>(LeftMR);
994 if (RightER && LeftER) {
995 // Next, see if the two ERs have the same super-region and matching types.
996 // FIXME: This should do something useful even if the types don't match,
997 // though if both indexes are constant the RegionRawOffset path will
998 // give the correct answer.
999 if (LeftER->getSuperRegion() == RightER->getSuperRegion() &&
1000 LeftER->getElementType() == RightER->getElementType()) {
1001 // Get the left index and cast it to the correct type.
1002 // If the index is unknown or undefined, bail out here.
1003 SVal LeftIndexVal = LeftER->getIndex();
1004 std::optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>();
1005 if (!LeftIndex)
1006 return UnknownVal();
1007 LeftIndexVal = evalCast(*LeftIndex, ArrayIndexTy, QualType{});
1008 LeftIndex = LeftIndexVal.getAs<NonLoc>();
1009 if (!LeftIndex)
1010 return UnknownVal();
1011
1012 // Do the same for the right index.
1013 SVal RightIndexVal = RightER->getIndex();
1014 std::optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>();
1015 if (!RightIndex)
1016 return UnknownVal();
1017 RightIndexVal = evalCast(*RightIndex, ArrayIndexTy, QualType{});
1018 RightIndex = RightIndexVal.getAs<NonLoc>();
1019 if (!RightIndex)
1020 return UnknownVal();
1021
1022 // Actually perform the operation.
1023 // evalBinOpNN expects the two indexes to already be the right type.
1024 return evalBinOpNN(state, op, *LeftIndex, *RightIndex, resultTy);
1025 }
1026 }
1027
1028 // Special handling of the FieldRegions, even with symbolic offsets.
1029 const FieldRegion *RightFR = dyn_cast<FieldRegion>(RightMR);
1030 const FieldRegion *LeftFR = dyn_cast<FieldRegion>(LeftMR);
1031 if (RightFR && LeftFR) {
1032 SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy,
1033 *this);
1034 if (!R.isUnknown())
1035 return R;
1036 }
1037
1038 // Compare the regions using the raw offsets.
1039 RegionOffset LeftOffset = LeftMR->getAsOffset();
1040 RegionOffset RightOffset = RightMR->getAsOffset();
1041
1042 if (LeftOffset.getRegion() != nullptr &&
1043 LeftOffset.getRegion() == RightOffset.getRegion() &&
1044 !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) {
1045 int64_t left = LeftOffset.getOffset();
1046 int64_t right = RightOffset.getOffset();
1047
1048 switch (op) {
1049 default:
1050 return UnknownVal();
1051 case BO_LT:
1052 return makeTruthVal(left < right, resultTy);
1053 case BO_GT:
1054 return makeTruthVal(left > right, resultTy);
1055 case BO_LE:
1056 return makeTruthVal(left <= right, resultTy);
1057 case BO_GE:
1058 return makeTruthVal(left >= right, resultTy);
1059 case BO_EQ:
1060 return makeTruthVal(left == right, resultTy);
1061 case BO_NE:
1062 return makeTruthVal(left != right, resultTy);
1063 }
1064 }
1065
1066 // At this point we're not going to get a good answer, but we can try
1067 // conjuring an expression instead.
1068 SymbolRef LHSSym = lhs.getAsLocSymbol();
1069 SymbolRef RHSSym = rhs.getAsLocSymbol();
1070 if (LHSSym && RHSSym)
1071 return makeNonLoc(LHSSym, op, RHSSym, resultTy);
1072
1073 // If we get here, we have no way of comparing the regions.
1074 return UnknownVal();
1075 }
1076 }
1077}
1078
1079SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
1081 NonLoc rhs, QualType resultTy) {
1082 if (op >= BO_PtrMemD && op <= BO_PtrMemI) {
1083 if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) {
1084 if (PTMSV->isNullMemberPointer())
1085 return UndefinedVal();
1086
1087 auto getFieldLValue = [&](const auto *FD) -> SVal {
1088 SVal Result = lhs;
1089
1090 for (const auto &I : *PTMSV)
1091 Result = StateMgr.getStoreManager().evalDerivedToBase(
1092 Result, I->getType(), I->isVirtual());
1093
1094 return state->getLValue(FD, Result);
1095 };
1096
1097 if (const auto *FD = PTMSV->getDeclAs<FieldDecl>()) {
1098 return getFieldLValue(FD);
1099 }
1100 if (const auto *FD = PTMSV->getDeclAs<IndirectFieldDecl>()) {
1101 return getFieldLValue(FD);
1102 }
1103 }
1104
1105 return rhs;
1106 }
1107
1108 assert(!BinaryOperator::isComparisonOp(op) &&
1109 "arguments to comparison ops must be of the same type");
1110
1111 // Special case: rhs is a zero constant.
1112 if (rhs.isZeroConstant())
1113 return lhs;
1114
1115 // Perserve the null pointer so that it can be found by the DerefChecker.
1116 if (lhs.isZeroConstant())
1117 return lhs;
1118
1119 // We are dealing with pointer arithmetic.
1120
1121 // Handle pointer arithmetic on constant values.
1122 if (std::optional<nonloc::ConcreteInt> rhsInt =
1123 rhs.getAs<nonloc::ConcreteInt>()) {
1124 if (std::optional<loc::ConcreteInt> lhsInt =
1125 lhs.getAs<loc::ConcreteInt>()) {
1126 const llvm::APSInt &leftI = lhsInt->getValue();
1127 assert(leftI.isUnsigned());
1128 llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true);
1129
1130 // Convert the bitwidth of rightI. This should deal with overflow
1131 // since we are dealing with concrete values.
1132 rightI = rightI.extOrTrunc(leftI.getBitWidth());
1133
1134 // Offset the increment by the pointer size.
1135 llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true);
1136 QualType pointeeType = resultTy->getPointeeType();
1137 Multiplicand = getContext().getTypeSizeInChars(pointeeType).getQuantity();
1138 rightI *= Multiplicand;
1139
1140 // Compute the adjusted pointer.
1141 switch (op) {
1142 case BO_Add:
1143 rightI = leftI + rightI;
1144 break;
1145 case BO_Sub:
1146 rightI = leftI - rightI;
1147 break;
1148 default:
1149 llvm_unreachable("Invalid pointer arithmetic operation");
1150 }
1151 return loc::ConcreteInt(getBasicValueFactory().getValue(rightI));
1152 }
1153 }
1154
1155 // Handle cases where 'lhs' is a region.
1156 if (const MemRegion *region = lhs.getAsRegion()) {
1157 rhs = convertToArrayIndex(rhs).castAs<NonLoc>();
1158 SVal index = UnknownVal();
1159 const SubRegion *superR = nullptr;
1160 // We need to know the type of the pointer in order to add an integer to it.
1161 // Depending on the type, different amount of bytes is added.
1162 QualType elementType;
1163
1164 if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(region)) {
1165 assert(op == BO_Add || op == BO_Sub);
1166 index = evalBinOpNN(state, op, elemReg->getIndex(), rhs,
1167 getArrayIndexType());
1168 superR = cast<SubRegion>(elemReg->getSuperRegion());
1169 elementType = elemReg->getElementType();
1170 }
1171 else if (isa<SubRegion>(region)) {
1172 assert(op == BO_Add || op == BO_Sub);
1173 index = (op == BO_Add) ? rhs : evalMinus(rhs);
1174 superR = cast<SubRegion>(region);
1175 // TODO: Is this actually reliable? Maybe improving our MemRegion
1176 // hierarchy to provide typed regions for all non-void pointers would be
1177 // better. For instance, we cannot extend this towards LocAsInteger
1178 // operations, where result type of the expression is integer.
1179 if (resultTy->isAnyPointerType())
1180 elementType = resultTy->getPointeeType();
1181 }
1182
1183 // Represent arithmetic on void pointers as arithmetic on char pointers.
1184 // It is fine when a TypedValueRegion of char value type represents
1185 // a void pointer. Note that arithmetic on void pointers is a GCC extension.
1186 if (elementType->isVoidType())
1187 elementType = getContext().CharTy;
1188
1189 if (std::optional<NonLoc> indexV = index.getAs<NonLoc>()) {
1190 return loc::MemRegionVal(MemMgr.getElementRegion(elementType, *indexV,
1191 superR, getContext()));
1192 }
1193 }
1194 return UnknownVal();
1195}
1196
1197const llvm::APSInt *SimpleSValBuilder::getConstValue(ProgramStateRef state,
1198 SVal V) {
1199 if (const llvm::APSInt *Res = getConcreteValue(V))
1200 return Res;
1201
1202 if (SymbolRef Sym = V.getAsSymbol())
1203 return state->getConstraintManager().getSymVal(state, Sym);
1204
1205 return nullptr;
1206}
1207
1208const llvm::APSInt *SimpleSValBuilder::getConcreteValue(SVal V) {
1209 if (std::optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>())
1210 return &X->getValue();
1211
1212 if (std::optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>())
1213 return &X->getValue();
1214
1215 return nullptr;
1216}
1217
1218const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state,
1219 SVal V) {
1220 return getConstValue(state, simplifySVal(state, V));
1221}
1222
1223const llvm::APSInt *SimpleSValBuilder::getMinValue(ProgramStateRef state,
1224 SVal V) {
1225 V = simplifySVal(state, V);
1226
1227 if (const llvm::APSInt *Res = getConcreteValue(V))
1228 return Res;
1229
1230 if (SymbolRef Sym = V.getAsSymbol())
1231 return state->getConstraintManager().getSymMinVal(state, Sym);
1232
1233 return nullptr;
1234}
1235
1236const llvm::APSInt *SimpleSValBuilder::getMaxValue(ProgramStateRef state,
1237 SVal V) {
1238 V = simplifySVal(state, V);
1239
1240 if (const llvm::APSInt *Res = getConcreteValue(V))
1241 return Res;
1242
1243 if (SymbolRef Sym = V.getAsSymbol())
1244 return state->getConstraintManager().getSymMaxVal(state, Sym);
1245
1246 return nullptr;
1247}
1248
1249SVal SimpleSValBuilder::simplifyUntilFixpoint(ProgramStateRef State, SVal Val) {
1250 SVal SimplifiedVal = simplifySValOnce(State, Val);
1251 while (SimplifiedVal != Val) {
1252 Val = SimplifiedVal;
1253 SimplifiedVal = simplifySValOnce(State, Val);
1254 }
1255 return SimplifiedVal;
1256}
1257
1258SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
1259 return simplifyUntilFixpoint(State, V);
1260}
1261
1262SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) {
1263 // For now, this function tries to constant-fold symbols inside a
1264 // nonloc::SymbolVal, and does nothing else. More simplifications should
1265 // be possible, such as constant-folding an index in an ElementRegion.
1266
1267 class Simplifier : public FullSValVisitor<Simplifier, SVal> {
1268 ProgramStateRef State;
1269 SValBuilder &SVB;
1270
1271 // Cache results for the lifetime of the Simplifier. Results change every
1272 // time new constraints are added to the program state, which is the whole
1273 // point of simplifying, and for that very reason it's pointless to maintain
1274 // the same cache for the duration of the whole analysis.
1275 llvm::DenseMap<SymbolRef, SVal> Cached;
1276
1277 static bool isUnchanged(SymbolRef Sym, SVal Val) {
1278 return Sym == Val.getAsSymbol();
1279 }
1280
1281 SVal cache(SymbolRef Sym, SVal V) {
1282 Cached[Sym] = V;
1283 return V;
1284 }
1285
1286 SVal skip(SymbolRef Sym) {
1287 return cache(Sym, SVB.makeSymbolVal(Sym));
1288 }
1289
1290 // Return the known const value for the Sym if available, or return Undef
1291 // otherwise.
1292 SVal getConst(SymbolRef Sym) {
1293 const llvm::APSInt *Const =
1294 State->getConstraintManager().getSymVal(State, Sym);
1295 if (Const)
1296 return Loc::isLocType(Sym->getType()) ? (SVal)SVB.makeIntLocVal(*Const)
1297 : (SVal)SVB.makeIntVal(*Const);
1298 return UndefinedVal();
1299 }
1300
1301 SVal getConstOrVisit(SymbolRef Sym) {
1302 const SVal Ret = getConst(Sym);
1303 if (Ret.isUndef())
1304 return Visit(Sym);
1305 return Ret;
1306 }
1307
1308 public:
1309 Simplifier(ProgramStateRef State)
1310 : State(State), SVB(State->getStateManager().getSValBuilder()) {}
1311
1312 SVal VisitSymbolData(const SymbolData *S) {
1313 // No cache here.
1314 if (const llvm::APSInt *I =
1315 State->getConstraintManager().getSymVal(State, S))
1316 return Loc::isLocType(S->getType()) ? (SVal)SVB.makeIntLocVal(*I)
1317 : (SVal)SVB.makeIntVal(*I);
1318 return SVB.makeSymbolVal(S);
1319 }
1320
1321 SVal VisitSymIntExpr(const SymIntExpr *S) {
1322 auto I = Cached.find(S);
1323 if (I != Cached.end())
1324 return I->second;
1325
1326 SVal LHS = getConstOrVisit(S->getLHS());
1327 if (isUnchanged(S->getLHS(), LHS))
1328 return skip(S);
1329
1330 SVal RHS;
1331 // By looking at the APSInt in the right-hand side of S, we cannot
1332 // figure out if it should be treated as a Loc or as a NonLoc.
1333 // So make our guess by recalling that we cannot multiply pointers
1334 // or compare a pointer to an integer.
1335 if (Loc::isLocType(S->getLHS()->getType()) &&
1336 BinaryOperator::isComparisonOp(S->getOpcode())) {
1337 // The usual conversion of $sym to &SymRegion{$sym}, as they have
1338 // the same meaning for Loc-type symbols, but the latter form
1339 // is preferred in SVal computations for being Loc itself.
1340 if (SymbolRef Sym = LHS.getAsSymbol()) {
1341 assert(Loc::isLocType(Sym->getType()));
1342 LHS = SVB.makeLoc(Sym);
1343 }
1344 RHS = SVB.makeIntLocVal(S->getRHS());
1345 } else {
1346 RHS = SVB.makeIntVal(S->getRHS());
1347 }
1348
1349 return cache(
1350 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1351 }
1352
1353 SVal VisitIntSymExpr(const IntSymExpr *S) {
1354 auto I = Cached.find(S);
1355 if (I != Cached.end())
1356 return I->second;
1357
1358 SVal RHS = getConstOrVisit(S->getRHS());
1359 if (isUnchanged(S->getRHS(), RHS))
1360 return skip(S);
1361
1362 SVal LHS = SVB.makeIntVal(S->getLHS());
1363 return cache(
1364 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1365 }
1366
1367 SVal VisitSymSymExpr(const SymSymExpr *S) {
1368 auto I = Cached.find(S);
1369 if (I != Cached.end())
1370 return I->second;
1371
1372 // For now don't try to simplify mixed Loc/NonLoc expressions
1373 // because they often appear from LocAsInteger operations
1374 // and we don't know how to combine a LocAsInteger
1375 // with a concrete value.
1376 if (Loc::isLocType(S->getLHS()->getType()) !=
1377 Loc::isLocType(S->getRHS()->getType()))
1378 return skip(S);
1379
1380 SVal LHS = getConstOrVisit(S->getLHS());
1381 SVal RHS = getConstOrVisit(S->getRHS());
1382
1383 if (isUnchanged(S->getLHS(), LHS) && isUnchanged(S->getRHS(), RHS))
1384 return skip(S);
1385
1386 return cache(
1387 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1388 }
1389
1390 SVal VisitSymbolCast(const SymbolCast *S) {
1391 auto I = Cached.find(S);
1392 if (I != Cached.end())
1393 return I->second;
1394 const SymExpr *OpSym = S->getOperand();
1395 SVal OpVal = getConstOrVisit(OpSym);
1396 if (isUnchanged(OpSym, OpVal))
1397 return skip(S);
1398
1399 return cache(S, SVB.evalCast(OpVal, S->getType(), OpSym->getType()));
1400 }
1401
1402 SVal VisitUnarySymExpr(const UnarySymExpr *S) {
1403 auto I = Cached.find(S);
1404 if (I != Cached.end())
1405 return I->second;
1406 SVal Op = getConstOrVisit(S->getOperand());
1407 if (isUnchanged(S->getOperand(), Op))
1408 return skip(S);
1409
1410 return cache(
1411 S, SVB.evalUnaryOp(State, S->getOpcode(), Op, S->getType()));
1412 }
1413
1415
1416 SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); }
1417
1418 SVal VisitSymbolVal(nonloc::SymbolVal V) {
1419 // Simplification is much more costly than computing complexity.
1420 // For high complexity, it may be not worth it.
1421 return Visit(V.getSymbol());
1422 }
1423
1424 SVal VisitSVal(SVal V) { return V; }
1425 };
1426
1427 SVal SimplifiedV = Simplifier(State).Visit(V);
1428 return SimplifiedV;
1429}
#define V(N, I)
Definition: ASTContext.h:3341
static std::optional< int64_t > getConcreteValue(NonLoc SV)
#define X(type, name)
Definition: Value.h:143
static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym, llvm::APSInt Bound, ProgramStateRef State)
static NonLoc doRearrangeUnchecked(ProgramStateRef State, BinaryOperator::Opcode Op, SymbolRef LSym, llvm::APSInt LInt, SymbolRef RSym, llvm::APSInt RInt)
static std::pair< SymbolRef, llvm::APSInt > decomposeSymbol(SymbolRef Sym, BasicValueFactory &BV)
static bool isNegationValuePreserving(const llvm::APSInt &Value, APSIntType ResultType)
static std::optional< NonLoc > tryRearrange(ProgramStateRef State, BinaryOperator::Opcode Op, NonLoc Lhs, NonLoc Rhs, QualType ResultTy)
static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op, SymbolRef Sym, llvm::APSInt Int, QualType Ty)
static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR, const FieldRegion *RightFR, BinaryOperator::Opcode op, QualType resultTy, SimpleSValBuilder &SVB)
static bool isWithinConstantOverflowBounds(SymbolRef Sym, ProgramStateRef State)
static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc, Loc LhsLoc)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:187
CanQualType VoidPtrTy
Definition: ASTContext.h:1146
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2394
bool isComparisonOp() const
Definition: Expr.h:3961
static Opcode negateComparisonOp(Opcode Opc)
Definition: Expr.h:3966
static Opcode reverseComparisonOp(Opcode Opc)
Definition: Expr.h:3979
bool isShiftOp() const
Definition: Expr.h:3949
bool isAdditiveOp() const
Definition: Expr.h:3947
Represents a member of a struct/union/class.
Definition: Decl.h:3030
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition: Decl.h:3247
Represents a field injected from an anonymous union/struct into the parent scope.
Definition: Decl.h:3318
A (possibly-)qualified type.
Definition: Type.h:941
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:1008
Represents a struct/union/class.
Definition: Decl.h:4145
field_range fields() const
Definition: Decl.h:4351
bool isVoidType() const
Definition: Type.h:8319
bool isBooleanType() const
Definition: Type.h:8447
bool isSignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is signed or an enumeration types whose underlying ty...
Definition: Type.cpp:2167
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:705
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:8434
bool isAnyPointerType() const
Definition: Type.h:8011
A record of the "type" of an APSInt, used for conversions.
Definition: APSIntType.h:19
bool isUnsigned() const
Definition: APSIntType.h:31
uint32_t getBitWidth() const
Definition: APSIntType.h:30
llvm::APSInt getMaxValue() const LLVM_READONLY
Returns the maximum value for this type.
Definition: APSIntType.h:65
void apply(llvm::APSInt &Value) const
Convert a given APSInt, in place, to match this type.
Definition: APSIntType.h:37
llvm::APSInt convert(const llvm::APSInt &Value) const LLVM_READONLY
Convert and return a new APSInt with the given value, but this type's bit width and signedness.
Definition: APSIntType.h:48
llvm::APSInt getValue(uint64_t RawValue) const LLVM_READONLY
Definition: APSIntType.h:69
APSIntType getAPSIntType(QualType T) const
Returns the type of the APSInt used to store values of the given QualType.
Template implementation for all binary symbolic expressions.
ElementRegion is used to represent both array elements and casts.
Definition: MemRegion.h:1199
QualType getElementType() const
Definition: MemRegion.h:1223
NonLoc getIndex() const
Definition: MemRegion.h:1219
LLVM_ATTRIBUTE_RETURNS_NONNULL const FieldDecl * getDecl() const override
Definition: MemRegion.h:1125
FullSValVisitor - a convenient mixed visitor for all three: SVal, SymExpr and MemRegion subclasses.
Definition: SValVisitor.h:130
static bool isLocType(QualType T)
Definition: SVals.h:259
RetTy VisitMemRegion(const MemRegion *R)
Definition: SValVisitor.h:120
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:97
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion * getMemorySpace() const
Definition: MemRegion.cpp:1328
RegionOffset getAsOffset() const
Compute the offset within the top level memory object.
Definition: MemRegion.cpp:1662
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getBaseRegion() const
Definition: MemRegion.cpp:1354
MemSpaceRegion - A memory region that represents a "memory space"; for example, the set of global var...
Definition: MemRegion.h:208
Represent a region's offset within the top level base region.
Definition: MemRegion.h:64
bool hasSymbolicOffset() const
Definition: MemRegion.h:82
const MemRegion * getRegion() const
It might return null.
Definition: MemRegion.h:80
int64_t getOffset() const
Definition: MemRegion.h:84
virtual const llvm::APSInt * getKnownValue(ProgramStateRef state, SVal val)=0
Evaluates a given SVal.
BasicValueFactory & getBasicValueFactory()
Definition: SValBuilder.h:161
virtual SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with a memory location and non-location opera...
virtual SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, Loc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two memory location operands.
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
Definition: SValBuilder.h:290
loc::MemRegionVal makeLoc(SymbolRef sym)
Definition: SValBuilder.h:377
virtual const llvm::APSInt * getMinValue(ProgramStateRef state, SVal val)=0
Tries to get the minimal possible (integer) value of a given SVal.
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
DefinedSVal makeSymbolVal(SymbolRef Sym)
Make an SVal that represents the given symbol.
Definition: SValBuilder.h:400
SVal evalCast(SVal V, QualType CastTy, QualType OriginalTy)
Cast a given SVal to another SVal using given QualType's.
const AnalyzerOptions & getAnalyzerOptions() const
Definition: SValBuilder.h:170
virtual SVal simplifySVal(ProgramStateRef State, SVal Val)=0
Simplify symbolic expressions within a given SVal.
QualType getConditionType() const
Definition: SValBuilder.h:153
SVal evalUnaryOp(ProgramStateRef state, UnaryOperator::Opcode opc, SVal operand, QualType type)
virtual const llvm::APSInt * getMaxValue(ProgramStateRef state, SVal val)=0
Tries to get the maximal possible (integer) value of a given SVal.
SymbolManager & getSymbolManager()
Definition: SValBuilder.h:164
SVal evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op, SVal lhs, SVal rhs, QualType type)
loc::ConcreteInt makeIntLocVal(const llvm::APSInt &integer)
Definition: SValBuilder.h:306
RetTy VisitSVal(SVal V)
Definition: SValVisitor.h:61
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:55
bool isZeroConstant() const
Definition: SVals.cpp:258
SValKind getKind() const
Definition: SVals.h:90
SymbolRef getAsSymbol(bool IncludeBaseRegions=false) const
If this SVal wraps a symbol return that SymbolRef.
Definition: SVals.cpp:104
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
Definition: SVals.h:86
QualType getType(const ASTContext &) const
Try to get a reasonable type for the given value.
Definition: SVals.cpp:181
SymbolRef getAsLocSymbol(bool IncludeBaseRegions=false) const
If this SVal is a location and wraps a symbol, return that SymbolRef.
Definition: SVals.cpp:68
const MemRegion * getAsRegion() const
Definition: SVals.cpp:120
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition: SVals.h:82
bool isUnknown() const
Definition: SVals.h:102
SubRegion - A region that subsets another larger region.
Definition: MemRegion.h:446
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
Definition: MemRegion.h:459
RetTy VisitSymExpr(SymbolRef S)
Definition: SValVisitor.h:89
Symbolic value.
Definition: SymExpr.h:30
virtual QualType getType() const =0
Represents a cast expression.
A symbol representing data which can be stored in a memory location (region).
Definition: SymExpr.h:119
const SymIntExpr * getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const llvm::APSInt &rhs, QualType t)
const SymSymExpr * getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs, QualType t)
Represents a symbolic expression involving a unary operator.
Value representing integer constant.
Definition: SVals.h:297
Value representing pointer-to-member.
Definition: SVals.h:428
const PTMDataType getPTMData() const
Definition: SVals.h:435
Represents symbolic expression that isn't a location.
Definition: SVals.h:276
SValBuilder * createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context, ProgramStateManager &stateMgr)
bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition: Interp.h:276
bool Const(InterpState &S, CodePtr OpPC, const T &Arg)
Definition: Interp.h:1176
The JSON file list parser is used to communicate input to InstallAPI.
BinaryOperatorKind
const FunctionProtoType * T
unsigned long uint64_t
long int64_t