soroban-decompiler 0.2.3

Soroban WASM smart contract decompiler - reconstructs Rust source from compiled contracts
Documentation
//! Guard and comparison analysis helpers.
//!
//! Functions in this module classify conditional expressions generated by the
//! WASM compiler so that the pattern recognizer can distinguish user-level
//! preconditions (`if amount <= 0 { panic!() }`) from compiler-generated
//! boilerplate (type tag checks, overflow guards, decode checks).

use crate::ir::{BinOp as B, Expr, Literal, Statement, UnOp};

/// Decode Val-encoded comparison constants in guard conditions.
///
/// The WASM compiler generates fast comparisons like `result < (1 << 32)`
/// to check U32Val-encoded lengths without decoding. This function detects
/// large constants (> 2^31) in comparisons and decodes them by shifting
/// right 32 bits (extracting the U32Val value portion).
pub(super) fn decode_val_comparison_constants(expr: Expr) -> Expr {
    match expr {
        Expr::BinOp { op: op @ (B::Lt | B::Le | B::Gt | B::Ge | B::Eq | B::Ne), left, right } => {
            let new_left = decode_large_literal(*left);
            let new_right = decode_large_literal(*right);
            Expr::BinOp { op, left: Box::new(new_left), right: Box::new(new_right) }
        }
        other => other,
    }
}

/// If an expression is a large I64 literal that looks like a Val-encoded
/// constant (value > 2^31 with meaningful upper 32 bits), decode it.
fn decode_large_literal(expr: Expr) -> Expr {
    match &expr {
        Expr::Literal(Literal::I64(v)) => {
            let uv = *v as u64;
            // U32Val-space threshold: value >= 2^32, decode upper 32 bits
            if uv >= (1u64 << 32) && uv <= (u32::MAX as u64) << 32 | 0xFFFFFFFF {
                let decoded = (uv >> 32) as i64;
                // Only decode if the result is a reasonable small number
                if decoded >= 0 && decoded <= 1000 {
                    return Expr::Literal(Literal::I64(decoded));
                }
            }
        }
        _ => {}
    }
    expr
}

/// Check if an expression is a user-level comparison suitable for
/// a precondition guard (`if cond { panic!() }`).
///
/// A user comparison is a relational operator (Lt, Le, Gt, Ge, Eq, Ne)
/// where both sides reference named variables (Var), not raw literals
/// or compiler-generated artifacts like bit shifts.
pub(super) fn is_user_comparison(expr: &Expr) -> bool {
    match expr {
        // Direct comparison: amount <= 0, val < share_amount, len > 10
        Expr::BinOp { op: B::Lt | B::Le | B::Gt | B::Ge | B::Eq | B::Ne, left, right } => {
            (has_named_var(left) || has_named_var(right))
                && !is_type_tag_check_expr(expr)
                && !is_overflow_check_expr(expr)
                && !is_arithmetic_overflow_check(expr)
        }
        // Method call result used as bool guard: claimants.is_empty()
        Expr::MethodChain { .. } if has_named_var(expr) => true,
        // Host call / helper result used as bool guard: is_initialized()
        Expr::HostCall { .. } => true,
        // Negated expression: !check_time_bound(...)
        Expr::UnOp { op: UnOp::Not, operand } => is_user_comparison(operand),
        _ => false,
    }
}

/// Detect compiler-generated arithmetic overflow checks.
///
/// Pattern: `(a + b) < a` or `(a + b) < b` -- generated by the compiler
/// for `checked_add` (which Soroban uses for all `+=` operations).
/// Also: `a < (a - b)` for underflow checks on subtraction.
fn is_arithmetic_overflow_check(expr: &Expr) -> bool {
    match expr {
        // (a + b) < a  or  (a + b) < b
        Expr::BinOp { op: B::Lt, left, right } => {
            if let Expr::BinOp { op: B::Add, left: add_l, right: add_r } = left.as_ref() {
                // (a + b) < a -- left operand of addition matches right of comparison
                if format!("{:?}", add_l) == format!("{:?}", right)
                    || format!("{:?}", add_r) == format!("{:?}", right) {
                    return true;
                }
            }
            false
        }
        _ => false,
    }
}

/// Check if an expression tree contains at least one named variable
/// (Var that's not a raw/computed placeholder).
fn has_named_var(expr: &Expr) -> bool {
    match expr {
        Expr::Var(name) => !name.starts_with("local_") && !name.starts_with("/*"),
        Expr::BinOp { left, right, .. } => has_named_var(left) || has_named_var(right),
        Expr::UnOp { operand, .. } => has_named_var(operand),
        Expr::Ref(inner) => has_named_var(inner),
        _ => false,
    }
}

/// Check if an expression is a Soroban type tag check.
///
/// Pattern: `(expr & 0xFF) != constant` or `(expr & 255) != constant`.
/// These are generated by the SDK to validate Val type tags at runtime
/// and should be stripped during decompilation.
fn is_type_tag_check_expr(expr: &Expr) -> bool {
    match expr {
        // (x & 255) != N  or  (x & 255) == N
        Expr::BinOp { op: B::Ne | B::Eq, left, .. } => {
            matches!(left.as_ref(),
                Expr::BinOp { op: B::BitAnd, right, .. }
                if matches!(right.as_ref(),
                    Expr::Literal(Literal::I64(255))
                    | Expr::Literal(Literal::I32(255))
                )
            )
        }
        _ => false,
    }
}

/// Check if an expression is an i128 decode/conversion check.
///
/// Pattern: `literal == literal` or `literal != literal` where both are
/// small integer constants -- these come from i128 decoder helpers checking
/// return status. Handles both I32 and I64 literal types.
#[allow(dead_code)]
pub(super) fn is_decode_check_expr(expr: &Expr) -> bool {
    match expr {
        Expr::BinOp { op: B::Eq | B::Ne, left, right } => {
            let is_small_const = |e: &Expr| match e {
                Expr::Literal(Literal::I32(v)) => v.unsigned_abs() <= 32,
                Expr::Literal(Literal::I64(v)) => v.unsigned_abs() <= 32,
                _ => false,
            };
            is_small_const(left) && is_small_const(right)
        }
        _ => false,
    }
}

/// Check if an expression is an overflow guard from i128 arithmetic.
///
/// Pattern: `((a >> 63) ^ (b >> 63)) & (...) < 0` -- generated by the
/// compiler for i128 subtraction overflow detection.
fn is_overflow_check_expr(expr: &Expr) -> bool {
    // Top-level: something < 0
    if let Expr::BinOp { op: B::Lt, right, left, .. } = expr {
        let is_zero = matches!(right.as_ref(),
            Expr::Literal(Literal::I64(0))
            | Expr::Literal(Literal::I32(0))
        );
        if is_zero && contains_shr63(left) {
            return true;
        }
    }
    false
}

/// Recursively check if an expression contains `>> 63` shifts
/// (characteristic of i128 overflow detection).
fn contains_shr63(expr: &Expr) -> bool {
    match expr {
        Expr::BinOp { op: B::Shr, right, .. } => {
            matches!(right.as_ref(),
                Expr::Literal(Literal::I64(63))
                | Expr::Literal(Literal::I32(63))
            ) || contains_shr63(right)
        }
        Expr::BinOp { left, right, .. } => {
            contains_shr63(left) || contains_shr63(right)
        }
        Expr::UnOp { operand, .. } => contains_shr63(operand),
        _ => false,
    }
}

/// Check if a statement is boilerplate from the vec iteration pattern
/// (vec_len let-binding that was absorbed into the for-each/for-range header).
///
/// Only removes `len*` bindings -- NOT `item*` bindings, which carry the
/// actual loop variable data used in the loop body.
pub(super) fn is_vec_iteration_boilerplate(stmt: &Statement) -> bool {
    match stmt {
        Statement::Let { name, .. } => {
            name == "len" || name.starts_with("len_")
        }
        _ => false,
    }
}

/// Decide whether a resolved return expression is worth emitting.
///
/// Filters out noise like `/* computed */`, `/* unknown */`, `/* void */`
/// but allows named variables, literals, and operations through.
pub(super) fn should_emit_return_expr(expr: &Expr) -> bool {
    match expr {
        Expr::Raw(s) => {
            // Suppress void, unknown, computed placeholders
            !s.contains("void") && !s.contains("unknown") && !s.contains("computed")
        }
        Expr::Literal(_) | Expr::Var(_) | Expr::MethodChain { .. }
        | Expr::BinOp { .. } | Expr::UnOp { .. } | Expr::HostCall { .. }
        | Expr::MacroCall { .. } | Expr::StructLiteral { .. }
        | Expr::EnumVariant { .. } | Expr::Ref(_) => true,
    }
}