nyx-scanner 0.5.0

use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence, is_sink};
use crate::cfg::{EdgeKind, StmtKind};
use crate::patterns::Severity;
use petgraph::graph::NodeIndex;
use petgraph::visit::EdgeRef;

/// Strict err-identifier match for cfg-error-fallthrough.
///
/// The previous heuristic `lower.contains("err")` over-matched method
/// names like Java `logger.isErrorEnabled()` (the camelCase identifier
/// `isErrorEnabled` matched because it contains `err`).  The rule's
/// real target is a variable / field that holds an error value.
///
/// Returns true if the identifier is exactly `err` / `error` or a
/// snake-case error name (`err_x`, `error_x`, `x_err`, `x_error`).
/// CamelCase names (`isErrorEnabled`, `getError`, `errorMsg`) are
/// rejected — the cost is occasional FNs on Java-style error fields,
/// which is acceptable for a precision fix.
fn is_error_var_ident(name: &str) -> bool {
    let lower = name.to_ascii_lowercase();
    if lower == "err" || lower == "error" {
        return true;
    }
    if lower.starts_with("err_") || lower.starts_with("error_") {
        return true;
    }
    if lower.ends_with("_err") || lower.ends_with("_error") {
        return true;
    }
    false
}

/// Does the condition text contain a unary `!` (logical-not, NOT `!=`)
/// applied to an identifier or member chain whose name contains "err"?
///
/// Used by the error-fallthrough rule to skip happy-path checks
/// like `if (!data.error && Array.isArray(results))` whose TRUE branch
/// is the success path and is not expected to return.  The original
/// rule fires on `if (err) { warn(); } sink_after()` — a positive
/// error check whose body forgets to early-return.
fn contains_negated_err_identifier(text: &str) -> bool {
    let bytes = text.as_bytes();
    let mut i = 0;
    while i < bytes.len() {
        if bytes[i] != b'!' {
            i += 1;
            continue;
        }
        // Skip the `!=` / `!==` operators — those are comparisons, not
        // logical-not.  Only treat a `!` followed by whitespace or an
        // identifier-leading char as logical negation.
        if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
            i += 1;
            continue;
        }
        let mut j = i + 1;
        while j < bytes.len() && (bytes[j] == b' ' || bytes[j] == b'\t') {
            j += 1;
        }
        // Allow a leading `(` for `!(expr)` shapes — peek past one open
        // paren and continue capturing the identifier chain.
        if j < bytes.len() && bytes[j] == b'(' {
            j += 1;
            while j < bytes.len() && (bytes[j] == b' ' || bytes[j] == b'\t') {
                j += 1;
            }
        }
        let start = j;
        while j < bytes.len() {
            let b = bytes[j];
            if b.is_ascii_alphanumeric() || b == b'_' || b == b'.' || b == b'$' {
                j += 1;
            } else {
                break;
            }
        }
        if j > start {
            // Lowercase compare without allocating a full lowercase
            // copy: walk byte-by-byte.
            let mut k = start;
            while k + 2 < j {
                if (bytes[k] | 0x20) == b'e'
                    && (bytes[k + 1] | 0x20) == b'r'
                    && (bytes[k + 2] | 0x20) == b'r'
                {
                    return true;
                }
                k += 1;
            }
        }
        i = if j > i { j } else { i + 1 };
    }
    false
}

pub struct IncompleteErrorHandling;

/// Check if the true branch of an If node terminates (has Return/Break/Continue).
fn branch_terminates(cfg: &crate::cfg::Cfg, if_node: NodeIndex) -> bool {
    // Follow the True edge from the If node
    let true_successors: Vec<NodeIndex> = cfg
        .edges(if_node)
        .filter(|e| matches!(e.weight(), EdgeKind::True))
        .map(|e| e.target())
        .collect();

    if true_successors.is_empty() {
        return false;
    }

    // Check if any path through the true branch terminates
    for &start in &true_successors {
        if terminates_on_all_paths(cfg, start, if_node) {
            return true;
        }
    }

    false
}

/// Check if all paths from `node` reach a Return/Break/Continue before exiting scope.
fn terminates_on_all_paths(
    cfg: &crate::cfg::Cfg,
    node: NodeIndex,
    _scope_entry: NodeIndex,
) -> bool {
    use std::collections::HashSet;

    let mut visited = HashSet::new();
    let mut stack = vec![node];

    while let Some(current) = stack.pop() {
        if !visited.insert(current) {
            continue;
        }

        let info = &cfg[current];
        match info.kind {
            StmtKind::Return | StmtKind::Throw | StmtKind::Break | StmtKind::Continue => {
                // This path terminates
                continue;
            }
            _ => {}
        }

        let successors: Vec<_> = cfg.neighbors(current).collect();
        if successors.is_empty() {
            // Reached a dead end without terminating — path does not terminate
            return false;
        }

        for succ in successors {
            // Don't follow back edges (loops)
            let is_back_edge = cfg
                .edges(current)
                .any(|e| e.target() == succ && matches!(e.weight(), EdgeKind::Back));
            if !is_back_edge {
                stack.push(succ);
            }
        }
    }

    true
}

/// Find successor nodes after an If node merges.
///
/// Walks **only** the False edge of the if (and Seq edges from there),
/// so that sinks inside the True body are NOT counted as "post-if"
/// fallthrough sinks.  The False edge represents the no-error branch,
/// which is the path the rule wants to scan for "did execution fall
/// through past an unhandled error?".
///
/// For `if err != nil { warn(); }` with no statement after the if,
/// the False edge leads to the function exit and no sinks are found.
/// For `if err != nil { warn(); } sink(x)`, the False edge leads to
/// `sink(x)` and the rule fires correctly.
fn find_post_if_sinks(cfg: &crate::cfg::Cfg, if_node: NodeIndex) -> Vec<NodeIndex> {
    let mut sinks_after = Vec::new();
    let mut visited = std::collections::HashSet::new();

    // Seed from the False edge only.  If the if has no explicit False
    // edge (some CFG shapes omit it for one-branch ifs), fall back to
    // Seq edges from the if node — but never follow True edges, which
    // lead into the body.
    let mut stack: Vec<NodeIndex> = cfg
        .edges(if_node)
        .filter(|e| matches!(e.weight(), EdgeKind::False | EdgeKind::Seq))
        .map(|e| e.target())
        .collect();

    while let Some(current) = stack.pop() {
        if !visited.insert(current) {
            continue;
        }

        let info = &cfg[current];
        if is_sink(info) || (info.kind == StmtKind::Call && info.call.callee.is_some()) {
            sinks_after.push(current);
        }

        for edge in cfg.edges(current) {
            let succ = edge.target();
            // Don't follow back edges (loops) or exception edges.
            if matches!(edge.weight(), EdgeKind::Back | EdgeKind::Exception) {
                continue;
            }
            stack.push(succ);
        }
    }

    sinks_after
}

impl CfgAnalysis for IncompleteErrorHandling {
    fn name(&self) -> &'static str {
        "incomplete-error-handling"
    }

    fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
        let mut findings = Vec::new();

        for idx in ctx.cfg.node_indices() {
            let info = &ctx.cfg[idx];

            // Look for If nodes whose CONDITION involves "err" or "error".
            // `info.taint.uses` for an If node contains identifiers from the
            // whole if statement (condition + body) — see
            // `cfg::literals::extract_defs_uses_extra_defs` Kind::If branch
            // — so checking it would misfire on `if (!res.ok) { ... const
            // err = await … ; return … }` shapes whose body happens to
            // mention `err` even though the condition doesn't.  Use
            // `info.condition_vars`, which is populated strictly from the
            // condition subtree (`extract_condition_raw`).
            if info.kind != StmtKind::If {
                continue;
            }

            let mentions_err = info.condition_vars.iter().any(|u| is_error_var_ident(u));

            if !mentions_err {
                continue;
            }

            // Polarity gate: only fire when the condition POSITIVELY
            // checks for an error.  `if (!data.error && other)` is a
            // happy-path check — the TRUE branch is the success branch
            // and is not expected to terminate.  Detect by scanning the
            // condition text for any `!` (logical-not, distinct from
            // `!=`) preceding an identifier whose name contains "err".
            //
            // This is the polarity-aware complement to
            // `condition_negated` (which only catches the top-level
            // unary `!`); compound conditions with embedded
            // `!response.error` legitimately fall outside the rule's
            // intended target shape (Go `if err != nil { non-return }`
            // / JS `if (err) { warn(); }`).
            if let Some(text) = info.condition_text.as_deref()
                && contains_negated_err_identifier(text)
            {
                continue;
            }
            if info.condition_negated {
                continue;
            }

            // Check: does the true branch terminate?
            if branch_terminates(ctx.cfg, idx) {
                continue;
            }

            // Check: are there dangerous calls/sinks after this error check?
            let post_sinks = find_post_if_sinks(ctx.cfg, idx);
            let has_dangerous_successor = post_sinks.iter().any(|&s| is_sink(&ctx.cfg[s]));

            if has_dangerous_successor {
                findings.push(CfgFinding {
                    rule_id: "cfg-error-fallthrough".to_string(),
                    title: "Error check without return".to_string(),
                    severity: Severity::Medium,
                    confidence: Confidence::Medium,
                    span: info.ast.span,
                    message: "Error check does not terminate on error; \
                              execution falls through to dangerous operations"
                        .to_string(),
                    evidence: vec![idx],
                    score: None,
                });
            }
        }

        findings
    }
}

#[cfg(test)]
mod negation_tests {
    use super::contains_negated_err_identifier;

    #[test]
    fn detects_simple_negated_err() {
        assert!(contains_negated_err_identifier("!err"));
        assert!(contains_negated_err_identifier("!error"));
        assert!(contains_negated_err_identifier("! err"));
    }

    #[test]
    fn detects_negated_member_err() {
        assert!(contains_negated_err_identifier("!data.error"));
        assert!(contains_negated_err_identifier(
            "data && !data.error && Array.isArray(results)"
        ));
        assert!(contains_negated_err_identifier(
            "!response.errorMsg && response.ok"
        ));
    }

    #[test]
    fn does_not_match_inequality() {
        assert!(!contains_negated_err_identifier("err != nil"));
        assert!(!contains_negated_err_identifier("error !== null"));
    }

    #[test]
    fn does_not_match_positive_err_checks() {
        assert!(!contains_negated_err_identifier("err"));
        assert!(!contains_negated_err_identifier("err != null"));
        assert!(!contains_negated_err_identifier("response.error"));
        assert!(!contains_negated_err_identifier("hasError(x)"));
    }
}

#[cfg(test)]
mod err_ident_tests {
    use super::is_error_var_ident;

    #[test]
    fn matches_canonical_error_vars() {
        assert!(is_error_var_ident("err"));
        assert!(is_error_var_ident("error"));
        assert!(is_error_var_ident("ERR"));
        assert!(is_error_var_ident("Error"));
    }

    #[test]
    fn matches_snake_case_error_vars() {
        assert!(is_error_var_ident("err_resp"));
        assert!(is_error_var_ident("error_msg"));
        assert!(is_error_var_ident("response_err"));
        assert!(is_error_var_ident("parse_error"));
    }

    #[test]
    fn rejects_camelcase_method_names() {
        // Spring `logger.isErrorEnabled()` lifts `isErrorEnabled` into
        // `condition_vars`; under the old `lower.contains("err")` check
        // this fired the rule.  The new strict check rejects it — the
        // condition is asking "is logging enabled", not "is there an
        // error".
        assert!(!is_error_var_ident("isErrorEnabled"));
        assert!(!is_error_var_ident("getError"));
        assert!(!is_error_var_ident("hasError"));
        assert!(!is_error_var_ident("errorMsg"));
        assert!(!is_error_var_ident("errCode"));
    }

    #[test]
    fn rejects_unrelated_idents() {
        assert!(!is_error_var_ident("user"));
        assert!(!is_error_var_ident("merry"));
        assert!(!is_error_var_ident("perform"));
    }
}