repotoire 0.8.0

//! Command-injection detector (CWE-78).
//!
//! Flags execution of OS commands where the command string or arguments
//! are user-controlled. Severity is decided per-call from the AST shape
//! of the argument(s), not from line-text reverse engineering.
//!
//! # Architecture
//!
//! Two scan paths, picked by file language (mirrors
//! `eval_detector.rs`, `cleartext_credentials.rs`, `secrets.rs`):
//!
//! 1. **AST path** (Python, JS, TS, JSX, TSX, Go): walks the tree-sitter
//!    parse tree looking for **call expressions** whose callee matches a
//!    known command-execution API. All other languages either take the
//!    line path (Ruby, PHP) or are out of scope.
//!
//!      - **Python**: `os.system`, `os.popen`/`popen2`/`popen3`/`popen4`,
//!        `subprocess.run`/`call`/`Popen`/`check_output`/`check_call`,
//!        `subprocess.getoutput`/`getstatusoutput` (implicit shell=True),
//!        `commands.getoutput`/`getstatusoutput` (Python-2 legacy),
//!        `pty.spawn`. For `subprocess.*`, the `shell=True` keyword (or
//!        the 9th positional arg of `Popen`) bumps severity. Method
//!        definitions (`def system(self): ...`) and method calls on
//!        non-matching receivers are AST-distinguished from the real
//!        callee shape and never fire.
//!
//!      - **JavaScript / TypeScript**: `child_process.exec`/`execSync`,
//!        `child_process.execFile`/`execFileSync`,
//!        `child_process.spawn`/`spawnSync`, `child_process.fork`,
//!        `shelljs.exec`, plus bare `exec` / `execSync` (destructured
//!        from `child_process`). Receiver chain walks support
//!        `this.cp.exec(...)`, `require('child_process').exec(...)`, and
//!        `(await import('child_process')).exec(...)`.
//!
//!      - **Go**: `exec.Command(name, args...)`,
//!        `exec.CommandContext(ctx, name, args...)`,
//!        `syscall.Exec`/`syscall.StartProcess`. The canonical
//!        shell-injection form `exec.Command("sh", "-c", USER_VAR)` is
//!        recognized by inspecting arg[0] (shell name) + arg[1] (`-c`
//!        / `/c`) + arg[2] (variable) and bumped to Critical.
//!
//!    Severity is decided by classifying the relevant argument(s) into
//!    a `CommandArgKind`:
//!
//!    | API                              | StaticLiteral | StaticList | Interpolated | UserVariable | MixedList (var argv[0]) | MixedList (literal argv[0]) | Unknown |
//!    | -------------------------------- | ------------- | ---------- | ------------ | ------------ | ----------------------- | --------------------------- | ------- |
//!    | `os.system` / `os.popen`         | Low           | n/a        | Critical     | Critical     | n/a                     | n/a                         | High    |
//!    | `subprocess.* shell=True`        | Low           | n/a        | Critical     | Critical     | n/a                     | n/a                         | High    |
//!    | `subprocess.* shell=False`       | Low           | Low        | High         | High         | Critical                | Low                         | High    |
//!    | `subprocess.run ["sh","-c",VAR]` | n/a           | n/a        | n/a          | Critical     | Critical                | n/a                         | n/a     |
//!    | `subprocess.getoutput` (shell)   | Low           | n/a        | Critical     | Critical     | n/a                     | n/a                         | High    |
//!    | `child_process.exec`             | Low           | n/a        | Critical     | Critical     | n/a                     | n/a                         | High    |
//!    | `child_process.execFile/spawn`   | Low           | Low        | High         | High         | Critical                | Low                         | High    |
//!    | `spawn("sh", ["-c", VAR])`       | n/a           | n/a        | n/a          | Critical     | Critical                | n/a                         | n/a     |
//!    | `exec.Command "sh" "-c" VAR`     | n/a           | n/a        | n/a          | Critical     | n/a                     | n/a                         | n/a     |
//!    | `exec.Command VAR ...`           | Low           | Low        | High         | High         | Critical                | Low                         | High    |
//!
//!    The `MixedList (literal argv[0])` column is **Low** in the
//!    `shell=False` rows because with shell=False and a literal argv[0]
//!    the OS executes argv[0] directly via `execve` and passes argv[N>0]
//!    as raw strings to the target binary's `main(argc, argv)`. There is
//!    no shell, no metacharacter interpretation, no CWE-78 vector. (The
//!    one shell-injection sub-shape that *does* exist —
//!    `subprocess.run(["sh", "-c", USER_VAR])` and the equivalent JS
//!    spawn form — is recognised separately and reclassified to its own
//!    always-Critical API tag.)
//!
//!    `Low` findings are filtered out from the final result by default
//!    (matches the legacy detector behavior; same filter as
//!    `eval_detector::detect()`). The taint pass at the end of
//!    `detect()` may upgrade an existing finding to Critical when an
//!    unsanitized user-input dataflow is confirmed.
//!
//!    A route-handler severity boost (function name contains `handler`/
//!    `route`/`endpoint`/`view`/`controller`/`middleware`/`request`/
//!    `response`, or matches `^(get|post|put|delete|patch|head|options)
//!    [A-Z]`) bumps `High`/`Medium` findings to `Critical`. This mirrors
//!    `eval_detector`'s B8 fix.
//!
//! 2. **Line path** (Ruby, PHP, Java, sh, ...): for languages without a
//!    tree-sitter grammar in our dispatch list, a small line-based
//!    regex scanner matches canonical Ruby/PHP/Java forms. This is the
//!    same concession `eval_detector::scan_file_line` makes. We accept
//!    a higher false-positive rate here as a known trade-off.
//!
//!      - **Ruby**: `system(...)`, `exec(...)`, `` `cmd #{var}` `` (backticks
//!        with interpolation). Backtick strings are detected by a
//!        line-text inspection for `` ` `` + `#{`.
//!      - **PHP**: `system($_GET[...])`, `shell_exec`, `passthru`,
//!        `proc_open`, plus `exec(...)` (the standalone PHP function).
//!      - **Java**: `Runtime.getRuntime().exec(...)`, `new ProcessBuilder(...)`.
//!
//! This is the structural counterpart of:
//!   - the cleartext-credentials AST migration (commit `4c656b2f`),
//!   - the eval-detector AST migration (commit `ac8400c6`, audit
//!     follow-up `474e6cb5`).
//!
//! The previous detector ran a line-based regex over raw text and then
//! needed bespoke filters (`def exec`, `.exec(`, `# os.system(`,
//! `RegExp.exec`, `String.prototype.exec`, ...) to suppress 5+
//! "exec/system substring is not a call" false-positive shapes. With
//! the AST, "is this token a call expression with `os.system` as its
//! callee" is a free piece of information and that filter chain
//! disappears.

mod annotation;
mod evidence;
mod predict;

use crate::detectors::ast_fingerprint::parse_root_ext;
use crate::detectors::ast_walk::AstWalkCtx;
use crate::detectors::base::Detector;
use crate::detectors::fast_search::{
    contains_any, find_in, FIND_CHILD_PROCESS, FIND_COMMANDS_GETOUTPUT,
    FIND_COMMANDS_GETSTATUSOUTPUT, FIND_EXEC_ASYNC, FIND_EXEC_COMMAND, FIND_EXEC_PAREN,
    FIND_EXEC_SYNC, FIND_GETOUTPUT_PAREN, FIND_OS_EXEC_IMPORT, FIND_OS_POPEN, FIND_OS_SYSTEM,
    FIND_PASSTHRU_PAREN, FIND_POPEN_PAREN, FIND_PROCESS_BUILDER, FIND_PROC_OPEN, FIND_PTY_SPAWN,
    FIND_RUNTIME_GETRUNTIME, FIND_SHELLJS, FIND_SHELL_EXEC, FIND_SHELL_TRUE, FIND_SHELL_TRUE_JS,
    FIND_SPAWN_SYNC, FIND_SUBPROCESS, FIND_SYSCALL_DOT, FIND_SYSTEM_PAREN,
};
use crate::detectors::security::ast_helpers::{
    collect_named_args, node_text, python_kwarg_truthy,
    receiver_chain_label as receiver_chain_label_shared, receiver_chain_label_go, unwrap_callee,
};
use crate::detectors::security::scan_inputs::{ScanAstInputs, ScanInputs};
use crate::detectors::taint::{TaintAnalysisResult, TaintAnalyzer, TaintCategory};
use crate::graph::GraphQueryExt;
use crate::models::{Finding, Severity};
use crate::parsers::lightweight::Language;
use anyhow::Result;
use regex::Regex;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::LazyLock;

// ---------------------------------------------------------------------------
// Single source of truth for supported file extensions.
// ---------------------------------------------------------------------------

/// Extensions this detector processes. Mirrors `cleartext_credentials.rs`
/// and `eval_detector.rs` `SUPPORTED_EXTS` so AST/line dispatch stays
/// aligned.
///
/// AST-eligible extensions flow through `scan_file_ast`; the rest fall
/// through to `scan_file_line`.
const SUPPORTED_EXTS: &[&str] = &[
    // AST path
    "py", "js", "ts", "jsx", "tsx", "go", // Line path
    "rb", "php", "java", "sh",
];

/// AST-eligible extensions. A file with one of these extensions flows
/// through `scan_file_ast`; everything else in `SUPPORTED_EXTS` flows
/// through `scan_file_line`.
const AST_EXTS: &[&str] = &["py", "js", "ts", "jsx", "tsx", "go"];

// ---------------------------------------------------------------------------
// Argument-shape classification
// ---------------------------------------------------------------------------

/// What kind of value is being passed as a command argument.
///
/// The AST-native equivalent of the legacy line-based filter chain
/// (`FIND_REQ_BODY` / `FIND_PARAMS_BRACKET` / `FIND_DOLLAR_BRACE` / ...).
/// Computed from the AST, this is both more precise (no string-tracking
/// gymnastics to know "is this a literal") and more general (handles
/// template literals, f-strings, ternaries, await, TS casts, ...).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) enum CommandArgKind {
    /// String literal with no interpolation — `os.system("date")`.
    StaticLiteral,
    /// List/array all of whose elements are static literals —
    /// `subprocess.run(["git", "status"])`.
    StaticList,
    /// String with variable interpolation — Python f-string with
    /// interpolation, JS template literal with substitution, or string
    /// concatenation that includes a non-literal operand.
    Interpolated,
    /// Identifier / attribute / subscript / call result —
    /// `os.system(user_input)`, `exec(req.body.cmd)`.
    UserVariable,
    /// List/array with at least one variable element AND a variable in
    /// position 0 — `subprocess.run([user_bin, "--flag"])`. The
    /// attacker chooses which binary runs.
    MixedListVarArgv0,
    /// List/array with at least one variable element but argv[0] is a
    /// static literal — `subprocess.run(["git", user_arg])`. Less
    /// dangerous than `MixedListVarArgv0` because the binary is fixed.
    MixedListLiteralArgv0,
    /// Function expression / arrow / lambda. Almost never seen in
    /// command APIs but handled for completeness.
    FunctionLike,
    /// Anything we don't classify. Defaults to `High`.
    Unknown,
}

/// Which command-exec API was matched at this site.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) enum CommandApi {
    /// `os.system`, `os.popen` (always shell-exec).
    PyOsSystem,
    PyOsPopen,
    /// `subprocess.run`/`call`/`Popen`/`check_output`/`check_call` with
    /// `shell=False` (or shell unspecified).
    PySubprocessNoShell,
    /// `subprocess.run(["sh", "-c", USER_VAR])` and friends —
    /// `shell=False` but argv[0] is a shell binary and argv[1] is `-c`,
    /// so the shell still interprets argv[2] as a command line. The
    /// canonical Python shell-injection shape; mirrors
    /// `GoExecCommandShellC`.
    PySubprocessShellC,
    /// `subprocess.*` with `shell=True`.
    PySubprocessShell,
    /// `subprocess.getoutput` / `getstatusoutput` — implicit shell=True.
    PySubprocessGetOutput,
    /// `pty.spawn(cmd)`.
    PyPtySpawn,
    /// `commands.getoutput` / `getstatusoutput` (Python-2 legacy).
    PyCommandsGetOutput,
    /// JS `child_process.exec` / `execSync` — always shell=True.
    JsChildProcessExec,
    /// JS `child_process.execFile` / `execFileSync` — argv[0] is the
    /// binary path; no shell.
    JsChildProcessExecFile,
    /// JS `child_process.spawn` / `spawnSync` — argv[0] is the binary
    /// path; no shell.
    JsChildProcessSpawn,
    /// JS `child_process.spawn("sh", ["-c", USER_VAR])` and friends —
    /// `shell: false` but argv[0] is a shell binary and the args list
    /// starts with `-c`, so the shell still interprets the next arg as
    /// a command line. Mirrors `GoExecCommandShellC` and
    /// `PySubprocessShellC`.
    JsSpawnShellC,
    /// JS `child_process.fork(modulePath, ...)`.
    JsChildProcessFork,
    /// `shelljs.exec(cmd)`.
    JsShellJsExec,
    /// Go `exec.Command(name, args...)` with `name == "sh"` etc and
    /// `args[0] == "-c"`/`"/c"` and a variable in args[1] — the
    /// canonical shell-injection form.
    GoExecCommandShellC,
    /// Go `exec.Command(name, args...)` general form.
    GoExecCommand,
    /// Go `syscall.Exec`/`syscall.StartProcess`.
    GoSyscallExec,
}

impl CommandApi {
    pub(super) fn callee_label(self) -> &'static str {
        match self {
            CommandApi::PyOsSystem => "os.system",
            CommandApi::PyOsPopen => "os.popen",
            CommandApi::PySubprocessNoShell => "subprocess",
            CommandApi::PySubprocessShellC => "subprocess.run([\"sh\", \"-c\", ...])",
            CommandApi::PySubprocessShell => "subprocess (shell=True)",
            CommandApi::PySubprocessGetOutput => "subprocess.getoutput",
            CommandApi::PyPtySpawn => "pty.spawn",
            CommandApi::PyCommandsGetOutput => "commands.getoutput",
            CommandApi::JsChildProcessExec => "child_process.exec",
            CommandApi::JsChildProcessExecFile => "child_process.execFile",
            CommandApi::JsChildProcessSpawn => "child_process.spawn",
            CommandApi::JsSpawnShellC => "child_process.spawn(\"sh\", [\"-c\", ...])",
            CommandApi::JsChildProcessFork => "child_process.fork",
            CommandApi::JsShellJsExec => "shelljs.exec",
            CommandApi::GoExecCommandShellC => "exec.Command(\"sh\", \"-c\", ...)",
            CommandApi::GoExecCommand => "exec.Command",
            CommandApi::GoSyscallExec => "syscall.Exec",
        }
    }

    /// Is this API a Python variant? Used by the Phase 2d dual-branch
    /// emit path to gate Python-only emission per decisions D4.
    pub(super) fn is_python(self) -> bool {
        matches!(
            self,
            CommandApi::PyOsSystem
                | CommandApi::PyOsPopen
                | CommandApi::PySubprocessNoShell
                | CommandApi::PySubprocessShellC
                | CommandApi::PySubprocessShell
                | CommandApi::PySubprocessGetOutput
                | CommandApi::PyPtySpawn
                | CommandApi::PyCommandsGetOutput
        )
    }

    /// Is this API "always shell" (i.e. argv[0] is interpreted by /bin/sh)?
    fn is_shell_api(self) -> bool {
        matches!(
            self,
            CommandApi::PyOsSystem
                | CommandApi::PyOsPopen
                | CommandApi::PySubprocessShell
                | CommandApi::PySubprocessShellC
                | CommandApi::PySubprocessGetOutput
                | CommandApi::PyCommandsGetOutput
                | CommandApi::JsChildProcessExec
                | CommandApi::JsSpawnShellC
                | CommandApi::JsShellJsExec
                | CommandApi::GoExecCommandShellC
        )
    }

    /// Severity for this API + arg shape combination. See the table in
    /// the module-level doc.
    ///
    /// `literal_text` is the contents of a `StaticLiteral` argument, when
    /// available. For shell-mode APIs, a literal containing shell
    /// metacharacters (`;`, `&&`, `$(`, ...) is a real injection vector
    /// even with no variable, so we bump it to `Medium` (audit B15).
    pub(super) fn severity_for(self, kind: CommandArgKind, literal_text: Option<&str>) -> Severity {
        // The shell-c forms are their own beast: argv[0] is a shell
        // binary, argv[1] is `-c`, and the next arg is interpreted by
        // the shell as a command line. The whole point of recognising
        // these shapes is that any non-literal argv[≥2] is a
        // shell-injection sink — Critical regardless of arg kind.
        if matches!(
            self,
            CommandApi::GoExecCommandShellC
                | CommandApi::PySubprocessShellC
                | CommandApi::JsSpawnShellC
        ) {
            return Severity::Critical;
        }

        // B15: shell-mode API + static-literal command containing
        // metacharacters is a real injection vector, not a benign
        // literal. Bump from Low → Medium.
        if self.is_shell_api() && kind == CommandArgKind::StaticLiteral {
            if let Some(text) = literal_text {
                if !shell_metachars_in(text).is_empty() {
                    return Severity::Medium;
                }
            }
        }

        if self.is_shell_api() {
            match kind {
                CommandArgKind::StaticLiteral => Severity::Low,
                CommandArgKind::StaticList => Severity::Low,
                CommandArgKind::Interpolated | CommandArgKind::UserVariable => Severity::Critical,
                // For shell APIs, lists are unusual but treat as a
                // mixed bag — shell still interprets argv[0].
                CommandArgKind::MixedListVarArgv0 => Severity::Critical,
                CommandArgKind::MixedListLiteralArgv0 => Severity::High,
                CommandArgKind::FunctionLike => Severity::Low,
                CommandArgKind::Unknown => Severity::High,
            }
        } else {
            match kind {
                CommandArgKind::StaticLiteral => Severity::Low,
                CommandArgKind::StaticList => Severity::Low,
                CommandArgKind::Interpolated | CommandArgKind::UserVariable => Severity::High,
                CommandArgKind::MixedListVarArgv0 => Severity::Critical,
                // `subprocess.run(["xdg-open", url], shell=False)` and
                // friends: with `shell=False` and a literal argv[0],
                // execve(2) executes argv[0] directly and passes
                // argv[≥1] as raw strings to the target binary's
                // `main(argc, argv)`. There is no shell, no metachar
                // interpretation, so this is **not CWE-78** (command
                // injection). Argument injection into the target
                // binary (CWE-88) remains possible, hence Low rather
                // than suppressed entirely. The shell-c sub-shape (an
                // argv[0] of `sh`/`bash`/... + argv[1] of `-c`) is
                // recognised earlier and reclassified to
                // `PySubprocessShellC`/`JsSpawnShellC`, so it never
                // reaches this branch.
                CommandArgKind::MixedListLiteralArgv0 => Severity::Low,
                CommandArgKind::FunctionLike => Severity::Low,
                CommandArgKind::Unknown => Severity::High,
            }
        }
    }
}

// ---------------------------------------------------------------------------
// Detector
// ---------------------------------------------------------------------------

/// Detects command-injection sinks (CWE-78).
pub struct CommandInjectionDetector {
    repository_path: PathBuf,
    max_findings: usize,
    taint_analyzer: TaintAnalyzer,
    precomputed_cross: std::sync::OnceLock<Vec<crate::detectors::taint::TaintPath>>,
    precomputed_intra: std::sync::OnceLock<Vec<crate::detectors::taint::TaintPath>>,
}

impl CommandInjectionDetector {
    pub fn new(repository_path: impl Into<PathBuf>) -> Self {
        Self {
            repository_path: repository_path.into(),
            max_findings: 50,
            taint_analyzer: TaintAnalyzer::new(),
            precomputed_cross: std::sync::OnceLock::new(),
            precomputed_intra: std::sync::OnceLock::new(),
        }
    }

    fn relative_path(&self, path: &Path) -> PathBuf {
        crate::detectors::detector_relative_path(&self.repository_path, path)
    }

    /// AST-first scanner. Walks the tree once. For every call whose
    /// callee is a known command-exec API, classifies the argument
    /// shape and emits a finding at the appropriate severity.
    ///
    /// `flag_on` controls the Phase 2d dual-branch emission. When
    /// `true` AND the file is Python AND the matched API is Python,
    /// the per-site loop calls [`Self::build_dual_branch_python_finding`]
    /// instead of [`Self::build_finding`]. Other languages and the
    /// flag-off path are unchanged.
    fn scan_file_ast(&self, inputs: &ScanAstInputs<'_>, flag_on: bool) -> Vec<Finding> {
        let path = inputs.path();
        let content = inputs.content();
        let ext = inputs.ext();
        let lang = inputs.lang;
        let cached_tree = inputs.cached_tree;
        let mut findings = vec![];

        if content.contains('\0') {
            return findings;
        }

        let owned;
        let root = match cached_tree {
            Some(tree) => tree.root_node(),
            None => match parse_root_ext(content, lang, ext) {
                Some(t) => {
                    owned = t;
                    owned.root_node()
                }
                None => return findings,
            },
        };

        let bytes = content.as_bytes();
        let lines: Vec<&str> = content.lines().collect();
        let mut sites: Vec<CommandSite> = Vec::new();
        let go_aliases = if matches!(lang, Language::Go) {
            collect_go_import_aliases(root, bytes)
        } else {
            GoImportAliases::default()
        };
        // Per-file Python from-import alias map: resolves
        // `from os import system; system(cmd)` and similar bare-call
        // shapes that the attribute-only matcher would otherwise miss.
        // Closes deferred B10 from `3c88328e`. Mirrors `insecure_crypto`'s
        // pattern (commit `32021903`).
        let py_aliases = if matches!(lang, Language::Python) {
            super::python_imports::collect_python_from_imports(root, bytes)
        } else {
            HashMap::new()
        };
        // Per-file Python module-alias map: resolves
        // `import subprocess as sp; sp.run(cmd, shell=True)` by mapping
        // the attribute-receiver text `sp` back to the canonical
        // module name `subprocess` before matching. Symmetric with
        // `py_aliases` (which handles the from-import shape) — see
        // `python_imports::collect_python_module_aliases`.
        let py_module_aliases = if matches!(lang, Language::Python) {
            super::python_imports::collect_python_module_aliases(root, bytes)
        } else {
            HashMap::new()
        };
        let ctx = AstWalkCtx {
            lang,
            source: bytes,
        };
        let aliases = super::python_imports::PythonAliases::new(&py_aliases, &py_module_aliases);
        collect_command_sites(&ctx, root, &go_aliases, &aliases, &mut sites);

        for site in sites {
            if findings.len() >= self.max_findings {
                break;
            }
            let line_idx = site.call_node.start_position().row;
            if let Some(line) = lines.get(line_idx) {
                let prev = if line_idx > 0 {
                    Some(lines[line_idx - 1])
                } else {
                    None
                };
                if crate::detectors::is_line_suppressed(line, prev) {
                    continue;
                }
            }
            let snippet = lines.get(line_idx).map(|s| s.trim()).unwrap_or("");
            let line_num = (line_idx + 1) as u32;

            // Phase 2d: dual-branch emission path. Gated on the per-detector
            // flag AND the language being Python AND the API being a Python
            // variant. JS/Go/line-scanner paths stay single-branch per
            // decisions D4.
            if flag_on && matches!(lang, Language::Python) && site.api.is_python() {
                findings.push(self.build_dual_branch_python_finding(
                    path,
                    line_num,
                    site.api,
                    site.arg_kind,
                    snippet,
                    site.call_node,
                    bytes,
                    &lines,
                    site.target_text.as_deref(),
                    ext,
                ));
                continue;
            }

            let severity = site
                .api
                .severity_for(site.arg_kind, site.target_text.as_deref());

            findings.push(self.build_finding(
                path,
                line_num,
                site.api,
                site.arg_kind,
                severity,
                snippet,
                ext,
                site.target_text.as_deref(),
            ));
        }

        findings
    }

    /// Legacy line scanner for non-AST languages (Ruby, PHP, Java, sh).
    ///
    /// For these formats reverse-engineering line shape is the only
    /// option. We accept a higher false-positive rate here as a known
    /// concession; the canonical command-exec forms in these languages
    /// are simple enough that line-based regex is sufficient in
    /// practice.
    ///
    /// **Path used by**: `.rb`, `.php`, `.java`, `.sh`. AST-eligible
    /// extensions flow through `scan_file_ast` instead.
    fn scan_file_line(&self, inputs: &ScanInputs<'_>) -> Vec<Finding> {
        let path = inputs.path;
        let content = inputs.content;
        let ext = inputs.ext;
        let mut findings = vec![];
        if content.len() > 500_000 {
            return findings;
        }
        let lines: Vec<&str> = content.lines().collect();
        for (i, line) in lines.iter().enumerate() {
            if findings.len() >= self.max_findings {
                break;
            }
            let prev = if i > 0 { Some(lines[i - 1]) } else { None };
            if crate::detectors::is_line_suppressed(line, prev) {
                continue;
            }
            let trimmed = line.trim_start();
            // Strip Ruby `#`, PHP `//` / `#`, Java/sh `//` comments.
            if trimmed.starts_with('#') || trimmed.starts_with("//") {
                continue;
            }
            if let Some((api, arg_kind)) = match_line_command(line, ext) {
                let line_num = (i + 1) as u32;
                // B15 metachar inspection is not plumbed through the
                // line-scan path: `match_line_command` returns a kind but
                // not the verbatim literal text. The AST path covers the
                // same APIs for Python/JS/Go which is where the bulk of
                // B15-relevant code lives. Tracked as a follow-up.
                let severity = api.severity_for(arg_kind, None);
                findings.push(self.build_finding(
                    path,
                    line_num,
                    api,
                    arg_kind,
                    severity,
                    line.trim(),
                    ext,
                    None,
                ));
            }
        }
        findings
    }

    /// Construct a `Finding` for a detected command-exec site.
    fn build_finding(
        &self,
        path: &Path,
        line_num: u32,
        api: CommandApi,
        arg_kind: CommandArgKind,
        severity: Severity,
        snippet: &str,
        ext: &str,
        literal_text: Option<&str>,
    ) -> Finding {
        let api_name = api.callee_label();
        // B15: when the argument is a static literal but contains shell
        // metacharacters under a shell-mode API, surface the metachar in
        // the title so reviewers immediately see why the literal is not
        // benign.
        let metachars = literal_text
            .filter(|_| api.is_shell_api() && arg_kind == CommandArgKind::StaticLiteral)
            .map(|t| shell_metachars_in(t))
            .unwrap_or_default();
        let title = if let Some(first) = metachars.first() {
            format!(
                "Potential command injection via {} (shell metacharacter `{}` in literal)",
                api_name, first,
            )
        } else {
            format!("Potential command injection via {}", api_name)
        };
        let arg_desc = match arg_kind {
            CommandArgKind::StaticLiteral if !metachars.is_empty() => {
                "static string literal containing shell metacharacters (RCE risk)"
            }
            CommandArgKind::StaticLiteral => "static string literal (low risk)",
            CommandArgKind::StaticList => "list of static literals (low risk)",
            CommandArgKind::Interpolated => "string with variable interpolation (RCE risk)",
            CommandArgKind::UserVariable => "user-controlled expression (RCE risk)",
            CommandArgKind::MixedListVarArgv0 => "list whose argv[0] is variable (RCE risk)",
            CommandArgKind::MixedListLiteralArgv0 => {
                "list with fixed argv[0] and variable later arguments \
                 (no shell-injection vector with shell=False; argument injection \
                 into the target binary is still possible — CWE-88)"
            }
            CommandArgKind::FunctionLike => "function value",
            CommandArgKind::Unknown => "non-static argument",
        };
        let lang_label = match ext {
            "py" => "python",
            "js" | "jsx" => "javascript",
            "ts" | "tsx" => "typescript",
            "rb" => "ruby",
            "php" => "php",
            "go" => "go",
            "java" => "java",
            "sh" => "bash",
            _ => "",
        };
        let description = format!(
            "**Potential Command Injection (CWE-78)**\n\n\
             **API**: `{}`\n\n\
             **Argument shape**: {}\n\n\
             **Location**: {}:{}\n\n\
             **Code snippet**:\n```{}\n{}\n```\n\n\
             OS-command-execution APIs run their argument as a shell or \
             argv list. When that argument is anything other than a \
             constant the program author controls at write time, \
             attackers who can influence the value get arbitrary \
             command execution.",
            api_name,
            arg_desc,
            path.display(),
            line_num,
            lang_label,
            snippet,
        );
        let suggested_fix = self.recommend(api, ext);

        Finding {
            id: String::new(),
            detector: "CommandInjectionDetector".to_string(),
            severity,
            title,
            description,
            affected_files: vec![self.relative_path(path)],
            line_start: Some(line_num),
            line_end: Some(line_num),
            suggested_fix: Some(suggested_fix),
            estimated_effort: Some("45 minutes".to_string()),
            category: Some("security".to_string()),
            cwe_id: Some("CWE-78".to_string()),
            why_it_matters: Some(
                "Attackers could execute arbitrary system commands by injecting shell \
                 metacharacters or by choosing the binary that runs."
                    .to_string(),
            ),
            ..Default::default()
        }
    }

    /// Phase 2d: build a dual-branch finding for a Python command-exec
    /// call site.
    ///
    /// Pipeline (matches Phase 2b path_traversal):
    ///
    /// ```text
    /// call_node ─extract_python_evidence─> Evidence ─predict─> Prediction
    ///         └── caller already validated lang + api + suppression + literal
    /// ```
    ///
    /// The resulting `Finding` carries:
    ///
    /// - Primary fields (`severity`, `title`, `description`,
    ///   `suggested_fix`) from the predicted branch.
    /// - `alternative_branch` populated with the opposite branch.
    /// - `prediction_reasons` holding each typed signal that
    ///   contributed to the score.
    /// - `resolution_signals` for collapsing annotations (when present).
    ///
    /// Severity convention from decision **D3**: predicted RealBug uses
    /// the existing 2D severity table `severity_for(api, arg_kind,
    /// literal_text)` (preserves B6/B15 calibration). Predicted Benign
    /// → `Info`. The alternative carries the opposite branch's natural
    /// severity (RealBug-from-table vs Info) so `--show-alternatives`
    /// renders the original interpretation.
    ///
    /// Important: dual-branch findings opt OUT of Pass B taint
    /// enrichment AND Pass C handler-boost — they're already classified
    /// by the predictor, and an unconditional `Critical` bump would
    /// defeat that. The skip is keyed on `Finding::is_dual_branch()`.
    #[allow(clippy::too_many_arguments)]
    fn build_dual_branch_python_finding(
        &self,
        path: &Path,
        line_num: u32,
        api: CommandApi,
        arg_kind: CommandArgKind,
        snippet: &str,
        call_node: tree_sitter::Node<'_>,
        source: &[u8],
        lines: &[&str],
        literal_text: Option<&str>,
        ext: &str,
    ) -> Finding {
        let api_label = api.callee_label();

        let evidence = evidence::extract_python_evidence(call_node, source, lines);
        let prediction = predict::predict(&evidence, api, arg_kind, literal_text);

        let predicted_label = prediction.predicted;
        let predicted_severity = prediction.predicted_severity;
        let predicted_title = match predicted_label {
            crate::dual_branch::BranchLabel::RealBug => {
                format!("Potential command injection via {api_label}")
            }
            crate::dual_branch::BranchLabel::Benign => {
                format!("Internal command exec via {api_label} (informational)")
            }
        };
        let lang_label = match ext {
            "py" => "python",
            _ => "",
        };
        let predicted_description = format!(
            "**Command injection (dual-branch, CWE-78)**\n\n\
             **API**: `{}`\n\n\
             **Location**: {}:{}\n\n\
             **Code**:\n```{}\n{}\n```\n\n\
             {}",
            api_label,
            path.display(),
            line_num,
            lang_label,
            snippet,
            match predicted_label {
                crate::dual_branch::BranchLabel::RealBug => format!(
                    "The argument to `{api_label}` appears to be \
                     attacker-influenceable (request source, parameter \
                     not classified as config/literal, or `shell=True` \
                     with non-literal command). The predictor leans \
                     RealBug for this call site (see `prediction_reasons`)."
                ),
                crate::dual_branch::BranchLabel::Benign => format!(
                    "The argument to `{api_label}` appears to be \
                     internal — fixed argv[0], all-literal list, or \
                     config-derived. The predictor leans Benign (see \
                     `prediction_reasons`); the original \
                     `severity_for`-table interpretation is carried in \
                     `alternative_branch`."
                ),
            },
        );
        let predicted_fix = match predicted_label {
            crate::dual_branch::BranchLabel::RealBug => Some(format!(
                "{}\n\nIf this is a false positive (the command is \
                 internal/config-derived and not attacker-reachable), \
                 annotate the call site with `# repotoire: \
                 command-static[<reason>]` to collapse the finding to \
                 Info.",
                self.recommend(api, ext)
            )),
            crate::dual_branch::BranchLabel::Benign => Some(
                "If this is intentional internal use, annotate \
                 `# repotoire: command-static[<reason>]` to collapse \
                 the finding to Info definitively. If this IS \
                 attacker-reachable (the alternative branch), follow \
                 the standard remediation: pass arguments as a list \
                 with `shell=False`, and validate any user-controlled \
                 component against an allowlist."
                    .to_string(),
            ),
        };

        let mut finding = Finding {
            id: String::new(),
            detector: "CommandInjectionDetector".to_string(),
            severity: predicted_severity,
            title: predicted_title,
            description: predicted_description,
            affected_files: vec![self.relative_path(path)],
            line_start: Some(line_num),
            line_end: Some(line_num),
            suggested_fix: predicted_fix,
            estimated_effort: Some("45 minutes".to_string()),
            category: Some("security".to_string()),
            cwe_id: Some("CWE-78".to_string()),
            why_it_matters: Some(
                "Command injection lets attackers run arbitrary OS \
                 commands — but not every exec call site is \
                 attacker-reachable. The predictor's job is to \
                 distinguish, and to keep the alternative interpretation \
                 visible via --show-alternatives."
                    .to_string(),
            ),
            ..Default::default()
        };

        finding = finding.with_alternative_branch(prediction.alternative_branch);
        for reason in prediction.reasons {
            finding = finding.with_prediction_reason(reason);
        }
        for resolution in prediction.resolutions {
            finding = finding.with_resolution_signal(resolution);
        }

        finding
    }

    /// Per-API remediation guidance.
    fn recommend(&self, api: CommandApi, ext: &str) -> String {
        match (api, ext) {
            (CommandApi::PyOsSystem | CommandApi::PyOsPopen, _) => {
                "Avoid `os.system` / `os.popen` — they spawn a shell and pass the \
                 string through it.\n\n\
                 - Prefer `subprocess.run([\"cmd\", \"arg\"], shell=False)` with a \
                 fixed argv[0] and untrusted input only as later argv elements.\n\
                 - Validate any user-controlled argv[0] against an allowlist."
                    .to_string()
            }
            (
                CommandApi::PySubprocessShell
                | CommandApi::PySubprocessGetOutput
                | CommandApi::PyCommandsGetOutput,
                _,
            ) => "Avoid `subprocess` with `shell=True` (or `getoutput`, which is \
                 implicit shell=True). Replace with the list form: \
                 `subprocess.run([\"cmd\", \"arg\"], shell=False)`."
                .to_string(),
            (CommandApi::PySubprocessNoShell, _) => {
                "Use a fixed-string argv[0]. Validate any user-controlled argv[0] \
                 against an allowlist of allowed binaries — even without shell=True \
                 the attacker can otherwise choose which program runs."
                    .to_string()
            }
            (CommandApi::PySubprocessShellC, _) => {
                "`subprocess.run([\"sh\", \"-c\", user_input])` is a textbook \
                 shell-injection sink — the shell still interprets user_input as \
                 a command line. Drop the shell entirely: use \
                 `subprocess.run([\"binary\", \"arg1\", user_input], shell=False)` \
                 with a fixed binary and pass user data as later argv elements."
                    .to_string()
            }
            (CommandApi::PyPtySpawn, _) => {
                "Avoid `pty.spawn` on user-controlled command strings. Use a fixed \
                 binary and pass user data as later argv elements."
                    .to_string()
            }
            (
                CommandApi::JsChildProcessExec | CommandApi::JsShellJsExec,
                "js" | "ts" | "jsx" | "tsx",
            ) => "Avoid `child_process.exec` (it always spawns a shell). Use \
                 `child_process.execFile` or `spawn` with `[binary, args]` and \
                 `shell: false`. Never interpolate user input into a command \
                 string."
                .to_string(),
            (
                CommandApi::JsChildProcessExecFile
                | CommandApi::JsChildProcessSpawn
                | CommandApi::JsChildProcessFork,
                "js" | "ts" | "jsx" | "tsx",
            ) => "Use a fixed binary path for argv[0]. Validate any \
                 user-controlled argv[0] against an allowlist — even without \
                 `shell: true` the attacker can otherwise choose which program \
                 runs."
                .to_string(),
            (CommandApi::JsSpawnShellC, _) => {
                "`child_process.spawn(\"sh\", [\"-c\", userInput])` is a \
                 textbook shell-injection sink — the shell still interprets \
                 userInput as a command line. Drop the shell entirely: use \
                 `child_process.spawn(\"binary\", [\"arg1\", userInput], { shell: false })` \
                 with a fixed binary and pass user data as later argv elements."
                    .to_string()
            }
            (CommandApi::GoExecCommandShellC, _) => {
                "`exec.Command(\"sh\", \"-c\", userInput)` is a textbook \
                 shell-injection sink. Use `exec.Command(binary, arg1, arg2)` with \
                 a fixed binary and pass user input as later arguments. Never \
                 build a shell command string from user data."
                    .to_string()
            }
            (CommandApi::GoExecCommand | CommandApi::GoSyscallExec, _) => {
                "Use a fixed binary path for the first argument. Validate any \
                 user-controlled argv[0] against an allowlist of allowed \
                 binaries. Use `filepath.Clean` for paths."
                    .to_string()
            }
            (_, "rb") => "Avoid `system` / `exec` / backtick-strings on user input. \
                 Use `Open3.capture2(['cmd', arg])` with an array form."
                .to_string(),
            (_, "php") => "Avoid `system` / `shell_exec` / `passthru` / `proc_open` / `exec` \
                 on user input. If you must shell out, validate with \
                 `escapeshellarg`/`escapeshellcmd` and use a fixed command."
                .to_string(),
            (_, "java") => "Use `ProcessBuilder` with an explicit argv list and a \
                 fixed binary; never concatenate user input into the command \
                 string."
                .to_string(),
            _ => "Avoid passing user-controlled data to OS-command APIs.".to_string(),
        }
    }
}

impl Detector for CommandInjectionDetector {
    fn name(&self) -> &'static str {
        "command-injection"
    }
    fn description(&self) -> &'static str {
        "Detects command injection vulnerabilities (AST-first; CWE-78)"
    }

    fn bypass_postprocessor(&self) -> bool {
        true
    }

    crate::detectors::impl_taint_precompute!();

    fn taint_category(&self) -> Option<crate::detectors::taint::TaintCategory> {
        Some(TaintCategory::CommandInjection)
    }

    fn file_extensions(&self) -> &'static [&'static str] {
        SUPPORTED_EXTS
    }

    fn content_requirements(&self) -> crate::detectors::detector_context::ContentFlags {
        crate::detectors::detector_context::ContentFlags::HAS_EXEC
    }

    fn detect(
        &self,
        ctx: &crate::detectors::analysis_context::AnalysisContext,
    ) -> Result<Vec<Finding>> {
        let graph = ctx.graph;
        let files = &ctx.as_file_provider();
        let mut findings: Vec<Finding> = vec![];

        // Phase 2d: capture the dual-branch flag once per detect() call,
        // mirroring path_traversal's pattern. Threaded into scan_file_ast
        // and gates Pass B/Pass C/drop-Low skips below.
        let flag_on = ctx.dual_branch.is_enabled_for("command-injection");

        // Run taint analysis for command injection (precomputed or fallback).
        let mut taint_paths = if let Some(cross) = self.precomputed_cross.get() {
            cross.clone()
        } else {
            self.taint_analyzer
                .trace_taint(graph, TaintCategory::CommandInjection)
        };
        let intra_paths = if let Some(intra) = self.precomputed_intra.get() {
            intra.clone()
        } else {
            crate::detectors::taint::run_intra_function_taint(
                &self.taint_analyzer,
                graph,
                TaintCategory::CommandInjection,
                &self.repository_path,
            )
        };
        taint_paths.extend(intra_paths);
        let taint_result = TaintAnalysisResult::from_paths(taint_paths);

        for path in files.files_with_extensions(SUPPORTED_EXTS) {
            if findings.len() >= self.max_findings {
                break;
            }

            let raw = match files.content(path) {
                Some(c) => c,
                None => continue,
            };
            let raw_str: &str = &raw;

            // Cheap pre-filter: skip files without any command-exec keyword.
            // Substrings cover every callee name we match, plus the
            // `shell=` keyword so we don't miss `subprocess.run(x, shell=True)`
            // in a file that imports subprocess by alias.
            if !contains_any(COMMAND_KEYWORD_FINDERS, raw_str) && !raw_str.contains('`') {
                continue;
            }

            if raw.len() > 500_000 {
                continue;
            }

            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
            let has_ast_grammar = AST_EXTS.contains(&ext);

            let new_findings = if has_ast_grammar {
                let cached = files.tree(path);
                let lang = Language::from_path(path);
                let scan = ScanInputs::new(path, raw_str, ext);
                let ast_inputs = ScanAstInputs::new(scan, lang, cached.as_deref());
                self.scan_file_ast(&ast_inputs, flag_on)
            } else {
                let scan = ScanInputs::new(path, raw_str, ext);
                self.scan_file_line(&scan)
            };
            findings.extend(new_findings);
        }

        // Severity refinement via taint analysis (Pass B).
        for finding in &mut findings {
            // Phase 2d: dual-branch findings opt out of taint mutation.
            // The predictor already chose a branch and a severity for
            // them; the unconditional `Critical`/`Low` overwrite below
            // would clobber that. See decisions doc § "Interaction with
            // the existing taint pass (Pass B) and handler boost (Pass
            // C)" and the mirror invariant in `path_traversal/mod.rs`.
            if finding.is_dual_branch() {
                continue;
            }
            let file_path = finding
                .affected_files
                .first()
                .map(|p| p.to_string_lossy().to_string())
                .unwrap_or_default();
            let line = finding.line_start.unwrap_or(0);
            for taint in &taint_result.paths {
                if (taint.sink_file == file_path || taint.source_file == file_path)
                    && (taint.sink_line == line || taint.source_line == line)
                {
                    if taint.is_sanitized {
                        finding.severity = Severity::Low;
                        finding.description = format!(
                            "{}\n\n**Taint Analysis Note**: A sanitizer function (`{}`) \
                             was found in the data flow path, which may mitigate this \
                             vulnerability.",
                            finding.description,
                            taint.sanitizer.as_deref().unwrap_or("unknown")
                        );
                    } else {
                        finding.severity = Severity::Critical;
                        finding.description = format!(
                            "{}\n\n**Taint Analysis Confirmed**: Data flow analysis \
                             traced a path from user input to this command-execution \
                             sink without sanitization:\n\n`{}`",
                            finding.description,
                            taint.path_string()
                        );
                    }
                    break;
                }
            }
        }

        // Severity boost when the call appears in a request-handler
        // function. Mirrors `eval_detector` B8 (camelCase verb-prefix
        // handler matching).
        static HANDLER_VERB_RE: LazyLock<Regex> = LazyLock::new(|| {
            Regex::new(r"^(get|post|put|delete|patch|head|options)[A-Z]").expect("valid regex")
        });
        for finding in &mut findings {
            // Phase 2d: dual-branch findings opt out of the handler
            // boost (Pass C). The handler-scope signal is already
            // encoded as `W_ENCLOSING_HANDLER = -0.30` in the predictor;
            // double-counting via a `High`/`Medium` → `Critical` bump
            // here would erase the carefully-calibrated 2D severity
            // table from decisions D3.
            if finding.is_dual_branch() {
                continue;
            }
            if !matches!(finding.severity, Severity::High | Severity::Medium) {
                continue;
            }
            if let (Some(file_path), Some(line)) =
                (finding.affected_files.first(), finding.line_start)
            {
                let path_str = file_path.to_string_lossy().to_string();
                let i = graph.interner();
                if let Some(func) = graph.find_function_at(&path_str, line) {
                    let raw_name = func.node_name(i);
                    let name_lower = raw_name.to_lowercase();
                    let is_route = name_lower.contains("handler")
                        || name_lower.contains("route")
                        || name_lower.contains("endpoint")
                        || name_lower.contains("view")
                        || name_lower.contains("controller")
                        || name_lower.contains("middleware")
                        || name_lower.contains("request")
                        || name_lower.contains("response")
                        || HANDLER_VERB_RE.is_match(raw_name);
                    if is_route {
                        finding.severity = Severity::Critical;
                    }
                }
            }
        }

        // Drop Low findings (static literals, sanitized paths). Caller-
        // controlled constants are not actionable without context the
        // detector doesn't have.
        //
        // Phase 2d: when the dual-branch flag is on we disable this
        // drop entirely, even for single-branch findings. Rationale:
        // under flag-on the predictor is the authoritative classifier;
        // a predicted-Benign-Info finding deserves to surface for
        // visibility and audit, and a predicted-RealBug-Low finding
        // (e.g. the canonical `shell=True` with a static literal) is
        // exactly the case where the predictor has more to say than the
        // raw severity table. See decisions D-Drop-Low sharpening.
        if !flag_on {
            findings.retain(|f| f.severity != Severity::Low);
        }

        Ok(findings)
    }
}

impl crate::detectors::RegisteredDetector for CommandInjectionDetector {
    fn create(init: &crate::detectors::DetectorInit) -> std::sync::Arc<dyn Detector> {
        std::sync::Arc::new(Self::new(init.repo_path))
    }
}

// ---------------------------------------------------------------------------
// Pre-filter
// ---------------------------------------------------------------------------

/// Cheap pre-filter: does this file contain any command-exec keyword
/// at all? Avoids the cost of a full parse on files that can't possibly
/// have a command-injection site.
///
/// Each callee name we match in `match_*_call` MUST be covered by a
/// substring here, otherwise the AST scan will never run on a file
/// containing only that callee.
///
/// Note: Ruby backticks (`` ` ``) are checked separately at the call site
/// via `content.contains('`')` because backtick is a single byte and
/// `Finder::new(b"`")` would be wasteful.
static COMMAND_KEYWORD_FINDERS: &[&LazyLock<memchr::memmem::Finder<'static>>] = &[
    &FIND_OS_SYSTEM,
    &FIND_OS_POPEN,
    &FIND_SUBPROCESS,
    &FIND_CHILD_PROCESS,
    &FIND_EXEC_SYNC,
    &FIND_EXEC_ASYNC,
    &FIND_SPAWN_SYNC,
    &FIND_SHELL_EXEC,
    &FIND_PROC_OPEN,
    &FIND_EXEC_COMMAND,
    &FIND_RUNTIME_GETRUNTIME,
    &FIND_PROCESS_BUILDER,
    &FIND_SHELL_TRUE,
    &FIND_SHELL_TRUE_JS,
    &FIND_EXEC_PAREN,
    &FIND_GETOUTPUT_PAREN,
    &FIND_SYSTEM_PAREN,
    &FIND_PASSTHRU_PAREN,
    &FIND_POPEN_PAREN,
    &FIND_SYSCALL_DOT,
    &FIND_SHELLJS,
    &FIND_PTY_SPAWN,
    &FIND_COMMANDS_GETOUTPUT,
    &FIND_COMMANDS_GETSTATUSOUTPUT,
    &FIND_OS_EXEC_IMPORT,
];

// ---------------------------------------------------------------------------
// AST walking
// ---------------------------------------------------------------------------

/// One command-injection-shaped call site we want to emit.
struct CommandSite<'a> {
    /// The call_expression node — used for line lookup.
    call_node: tree_sitter::Node<'a>,
    /// Which API was matched.
    api: CommandApi,
    /// Classified shape of the relevant argument.
    arg_kind: CommandArgKind,
    /// Verbatim source text of the classified argument, when it was a
    /// `StaticLiteral`. Used by the severity classifier to inspect the
    /// literal for shell metacharacters (audit B15). `None` for non-
    /// literal shapes (Interpolated, UserVariable, ...) since those
    /// already classify above Low.
    target_text: Option<String>,
}

/// Walk the tree and emit a `CommandSite` for every dangerous-API call.
fn collect_command_sites<'a>(
    ctx: &AstWalkCtx<'a>,
    node: tree_sitter::Node<'a>,
    go_aliases: &GoImportAliases,
    py_aliases: &super::python_imports::PythonAliases<'_>,
    out: &mut Vec<CommandSite<'a>>,
) {
    if let Some(site) = match_command_site(node, ctx.source, ctx.lang, go_aliases, py_aliases) {
        out.push(site);
    }
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        collect_command_sites(ctx, child, go_aliases, py_aliases, out);
    }
}

/// If `node` is a command-exec call (per language), return a `CommandSite`.
/// Otherwise `None`. AST shapes verified against `/tmp/ast-probe`.
fn match_command_site<'a>(
    node: tree_sitter::Node<'a>,
    source: &'a [u8],
    lang: Language,
    go_aliases: &GoImportAliases,
    py_aliases: &super::python_imports::PythonAliases<'_>,
) -> Option<CommandSite<'a>> {
    match (node.kind(), lang) {
        ("call", Language::Python) => match_python_call(node, source, py_aliases),
        ("call_expression", Language::JavaScript | Language::TypeScript) => {
            match_js_call(node, source)
        }
        ("call_expression", Language::Go) => match_go_call(node, source, go_aliases),
        _ => None,
    }
}

// ---------------------------------------------------------------------------
// Shell-c classification (shared across Python / JS / Go)
// ---------------------------------------------------------------------------

/// Is `name` (the stripped contents of a string literal) a known shell
/// binary name? Recognised across `subprocess.run(["sh", ...])`,
/// `child_process.spawn("sh", ...)`, and `exec.Command("sh", ...)`.
///
/// The set is the union of common POSIX shells and Windows command
/// interpreters. Path-prefixed forms (`/bin/sh`, `/usr/bin/bash`, ...)
/// are included so the detector recognises the same set of shapes a
/// developer would actually write in practice.
fn is_shell_binary_name(name: &str) -> bool {
    matches!(
        name,
        "sh" | "bash"
            | "dash"
            | "zsh"
            | "ksh"
            | "ash"
            | "csh"
            | "tcsh"
            | "fish"
            | "/bin/sh"
            | "/bin/bash"
            | "/bin/dash"
            | "/bin/zsh"
            | "/bin/ksh"
            | "/bin/ash"
            | "/bin/csh"
            | "/bin/tcsh"
            | "/usr/bin/sh"
            | "/usr/bin/bash"
            | "/usr/bin/dash"
            | "/usr/bin/zsh"
            | "/usr/bin/ksh"
            | "/usr/bin/env"
            | "/usr/local/bin/bash"
            | "cmd"
            | "cmd.exe"
            | "powershell"
            | "powershell.exe"
            | "pwsh"
            | "pwsh.exe"
    )
}

/// Is `flag` a recognised shell `-c`-equivalent flag? Accepts both
/// POSIX (`-c`) and Windows (`/c`, `/C`, `-Command`).
fn is_shell_c_flag(flag: &str) -> bool {
    matches!(flag, "-c" | "/c" | "/C" | "-Command")
}

// ---------------------------------------------------------------------------
// Python
// ---------------------------------------------------------------------------

/// Classify a `(module, name)` pair against the Python command-exec
/// API table. Returns `(api, classified_arg_index)` for callees that
/// fire; `None` for unrelated functions.
///
/// Used by both the attribute-call branch (`os.system(...)`) and the
/// bare-identifier branch (`from os import system; system(...)`) of
/// `match_python_call`. Centralizing the table here means adding a new
/// API automatically covers both call shapes.
fn classify_python_command_callee(
    module: &str,
    name: &str,
    arg_nodes: &[tree_sitter::Node<'_>],
    source: &[u8],
) -> Option<(CommandApi, usize)> {
    Some(match (module, name) {
        ("os", "system") => (CommandApi::PyOsSystem, 0),
        ("os", "popen" | "popen2" | "popen3" | "popen4") => (CommandApi::PyOsPopen, 0),
        ("subprocess", "run" | "call" | "Popen" | "check_output" | "check_call") => {
            let api = if python_subprocess_shell_true(arg_nodes, source) {
                CommandApi::PySubprocessShell
            } else {
                CommandApi::PySubprocessNoShell
            };
            (api, 0)
        }
        ("subprocess", "getoutput" | "getstatusoutput") => (CommandApi::PySubprocessGetOutput, 0),
        ("commands", "getoutput" | "getstatusoutput") => (CommandApi::PyCommandsGetOutput, 0),
        ("pty", "spawn") => (CommandApi::PyPtySpawn, 0),
        _ => return None,
    })
}

/// Match a Python `call` node against the command-exec API list.
fn match_python_call<'a>(
    node: tree_sitter::Node<'a>,
    source: &'a [u8],
    aliases: &super::python_imports::PythonAliases<'_>,
) -> Option<CommandSite<'a>> {
    let func = node.child_by_field_name("function")?;
    let func = unwrap_callee(func);
    let args = node.child_by_field_name("arguments")?;
    let arg_nodes = collect_named_args(args);

    // Recognize callee shapes:
    //   `os.system`         → attribute object=identifier "os" attribute="system"
    //   `os.popen`          → ditto
    //   `subprocess.run`    → ditto
    //   `pty.spawn`         → ditto
    //   `commands.getoutput`→ ditto
    //   bare `system(cmd)`  → identifier; resolved via from-import alias
    //                         map to the originating module (closes B10).
    //   `sp.run(cmd)` after `import subprocess as sp` → attribute whose
    //                         object resolves through `aliases.modules`
    //                         to the canonical module name.
    let (api, classified_arg_index) = match func.kind() {
        "attribute" => {
            let obj = func.child_by_field_name("object")?;
            let attr = func.child_by_field_name("attribute")?;
            let attr_text = node_text(attr, source)?;
            let raw_label = receiver_chain_label(obj, source);
            // Resolve module aliases (`import subprocess as sp`) by
            // looking up the case-sensitive identifier text first,
            // then falling back to the lowercased label.
            let obj_text = node_text(obj, source).unwrap_or("");
            let obj_label = aliases
                .modules
                .get(obj_text)
                .or_else(|| aliases.modules.get(raw_label.as_str()))
                .cloned()
                .unwrap_or(raw_label);
            classify_python_command_callee(obj_label.as_str(), attr_text, &arg_nodes, source)?
        }
        "identifier" => {
            // Bare-call: only fires if a `from <module> import <name>`
            // bound this name to one of the known command-exec modules.
            let name = node_text(func, source)?;
            let module = aliases.imports.get(name)?;
            classify_python_command_callee(module.as_str(), name, &arg_nodes, source)?
        }
        _ => return None,
    };

    let target = arg_nodes.get(classified_arg_index).copied()?;
    // Skip keyword_argument nodes when picking arg[0] — `subprocess.run(shell=True, args=cmd)`
    // would otherwise misclassify.
    let target = if target.kind() == "keyword_argument" {
        // Find first non-keyword argument.
        arg_nodes
            .iter()
            .copied()
            .find(|a| a.kind() != "keyword_argument")?
    } else {
        target
    };

    // Detect the canonical shell-c form
    // `subprocess.run(["sh", "-c", USER_VAR], ...)`. With `shell=False`
    // but argv[0] a shell binary and argv[1] `-c`, the shell still
    // interprets argv[2] as a command line — a shell-injection sink.
    // Reclassify the API to `PySubprocessShellC` (always Critical).
    // Mirrors the Go logic in `match_go_call`.
    if api == CommandApi::PySubprocessNoShell {
        if let Some(kind) = python_detect_shell_c(target, source) {
            return Some(CommandSite {
                call_node: node,
                api: CommandApi::PySubprocessShellC,
                arg_kind: kind,
                // `PySubprocessShellC` is always Critical regardless of
                // metachars; no need to capture the literal here.
                target_text: None,
            });
        }
    }

    let arg_kind = classify_command_arg_python(target, source);
    // For B15 metachar inspection: capture verbatim text of static
    // literals so the severity classifier can spot shell metachars.
    // Surrounding quotes are harmless to the check (none of our
    // metachars overlap with `"` / `'`).
    let target_text = if matches!(arg_kind, CommandArgKind::StaticLiteral) {
        node_text(target, source).map(|s| s.to_string())
    } else {
        None
    };

    Some(CommandSite {
        call_node: node,
        api,
        arg_kind,
        target_text,
    })
}

/// Inspect the keyword arguments of a `subprocess.*` call to determine
/// whether `shell=True` was passed. Non-literal values (e.g.
/// `shell=some_var`) are treated as `True` to match caller intent and
/// keep severity tables aligned between `PySubprocessShell` and
/// `PySubprocessNoShell` for the `Unknown` argv[0] kind.
fn python_subprocess_shell_true(args: &[tree_sitter::Node<'_>], source: &[u8]) -> bool {
    python_kwarg_truthy(args, "shell", source, /* unknown_default = */ true)
}

/// Extract the contents of a Python `string` node if it has no
/// interpolation (i.e. it is a plain `"..."`/`'...'`/`r"..."` literal,
/// not an f-string with substitutions). Returns `None` for non-string
/// nodes or interpolated strings.
///
/// Used by the Python shell-c reclassification to inspect argv[0] for
/// known shell-binary names.
fn python_string_literal_value(node: tree_sitter::Node<'_>, source: &[u8]) -> Option<String> {
    if node.kind() != "string" {
        return None;
    }
    // Concatenate `string_content` children. Bail on `interpolation` —
    // those are f-string substitutions and the value isn't a static
    // shell-binary literal.
    let mut cursor = node.walk();
    let mut buf = String::new();
    let mut saw_content = false;
    for child in node.children(&mut cursor) {
        match child.kind() {
            "interpolation" => return None,
            "string_content" => {
                if let Some(t) = node_text(child, source) {
                    buf.push_str(t);
                    saw_content = true;
                }
            }
            _ => {}
        }
    }
    if saw_content {
        return Some(buf);
    }
    // Fallback: strip outer quotes from the full source. Empty string
    // literals (`""`, `''`) have no `string_content` child.
    let raw = node_text(node, source)?;
    let inner = raw
        .strip_prefix("r\"")
        .or_else(|| raw.strip_prefix("r'"))
        .or_else(|| raw.strip_prefix('"'))
        .or_else(|| raw.strip_prefix('\''))?;
    let inner = inner
        .strip_suffix('"')
        .or_else(|| inner.strip_suffix('\''))?;
    Some(inner.to_string())
}

/// Recognise the `subprocess.run(["sh", "-c", USER_VAR])` shape.
///
/// If the first non-keyword arg is a list/tuple whose elements are
/// `["<shell-binary>", "-c", <variable>, ...]`, return
/// `Some(MixedListVarArgv0)` — the canonical Python shell-injection
/// shape. Caller reclassifies the API to `PySubprocessShellC` (always
/// Critical, regardless of the rest of the list).
///
/// Returns `None` if any of the structural conditions fail (not a
/// list, fewer than 3 elements, argv[0] not a known shell name,
/// argv[1] not `-c`, or argv[≥2] all literal). Keeping the gate tight
/// so we never reclassify a non-shell-c shape.
fn python_detect_shell_c(target: tree_sitter::Node<'_>, source: &[u8]) -> Option<CommandArgKind> {
    if !matches!(target.kind(), "list" | "tuple") {
        return None;
    }
    let mut elements: Vec<tree_sitter::Node<'_>> = Vec::new();
    let mut cursor = target.walk();
    for child in target.children(&mut cursor) {
        if child.is_named() {
            elements.push(child);
        }
    }
    if elements.len() < 3 {
        return None;
    }
    let name = python_string_literal_value(elements[0], source)?;
    if !is_shell_binary_name(name.as_str()) {
        return None;
    }
    let flag = python_string_literal_value(elements[1], source)?;
    if !is_shell_c_flag(flag.as_str()) {
        return None;
    }
    // Confirm at least one argv[≥2] is non-literal (otherwise the
    // whole list is static and there's no injection vector — leave
    // classification to the regular pipeline, which will flag it as
    // StaticList → Low).
    let any_var = elements[2..].iter().any(|e| {
        !matches!(
            classify_command_arg_python(*e, source),
            CommandArgKind::StaticLiteral | CommandArgKind::FunctionLike
        )
    });
    if !any_var {
        return None;
    }
    Some(CommandArgKind::MixedListVarArgv0)
}

/// Classify the shape of a Python command-call argument.
#[allow(clippy::only_used_in_recursion)]
fn classify_command_arg_python(node: tree_sitter::Node<'_>, source: &[u8]) -> CommandArgKind {
    match node.kind() {
        "string" => {
            // f-string with interpolation children → Interpolated. Plain
            // string → StaticLiteral.
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if child.kind() == "interpolation" {
                    return CommandArgKind::Interpolated;
                }
            }
            CommandArgKind::StaticLiteral
        }
        "concatenated_string" => {
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if classify_command_arg_python(child, source) == CommandArgKind::Interpolated {
                    return CommandArgKind::Interpolated;
                }
            }
            CommandArgKind::StaticLiteral
        }
        "binary_operator" => {
            let mut found_var = false;
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if !child.is_named() {
                    continue;
                }
                match classify_command_arg_python(child, source) {
                    CommandArgKind::UserVariable
                    | CommandArgKind::Interpolated
                    | CommandArgKind::Unknown => found_var = true,
                    _ => {}
                }
            }
            if found_var {
                CommandArgKind::Interpolated
            } else {
                CommandArgKind::StaticLiteral
            }
        }
        "list" | "tuple" => {
            // Walk named children. Track first-element shape and any
            // non-literal shape across the rest.
            let mut elements: Vec<tree_sitter::Node<'_>> = Vec::new();
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if child.is_named() {
                    elements.push(child);
                }
            }
            classify_list_elements_py(&elements, source)
        }
        "identifier" | "attribute" | "subscript" | "call" => CommandArgKind::UserVariable,
        "lambda" => CommandArgKind::FunctionLike,
        "parenthesized_expression" => {
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    return classify_command_arg_python(c, source);
                }
            }
            CommandArgKind::Unknown
        }
        "await" => {
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    return classify_command_arg_python(c, source);
                }
            }
            CommandArgKind::Unknown
        }
        "conditional_expression" => {
            // Ternary `a if cond else b`. Combine both branches; strongest wins.
            let mut strongest = CommandArgKind::StaticLiteral;
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    let k = classify_command_arg_python(c, source);
                    strongest = strongest_arg_kind(strongest, k);
                }
            }
            strongest
        }
        _ => CommandArgKind::Unknown,
    }
}

/// Classify a Python `list`/`tuple` element vector.
fn classify_list_elements_py(elements: &[tree_sitter::Node<'_>], source: &[u8]) -> CommandArgKind {
    if elements.is_empty() {
        return CommandArgKind::StaticList;
    }
    let first_kind = classify_command_arg_python(elements[0], source);
    let mut all_literal = matches!(first_kind, CommandArgKind::StaticLiteral);
    let mut any_non_literal = !all_literal;
    for e in &elements[1..] {
        let k = classify_command_arg_python(*e, source);
        match k {
            CommandArgKind::StaticLiteral => {}
            _ => {
                all_literal = false;
                any_non_literal = true;
            }
        }
    }
    if all_literal {
        CommandArgKind::StaticList
    } else if !matches!(first_kind, CommandArgKind::StaticLiteral) {
        CommandArgKind::MixedListVarArgv0
    } else if any_non_literal {
        CommandArgKind::MixedListLiteralArgv0
    } else {
        CommandArgKind::StaticList
    }
}

// ---------------------------------------------------------------------------
// JavaScript / TypeScript
// ---------------------------------------------------------------------------

/// Match a JS/TS `call_expression` against the command-exec API list.
fn match_js_call<'a>(node: tree_sitter::Node<'a>, source: &'a [u8]) -> Option<CommandSite<'a>> {
    let func = node.child_by_field_name("function")?;
    let args = node.child_by_field_name("arguments")?;
    let arg_nodes = collect_named_args(args);
    let func = unwrap_callee(func);

    let api = match func.kind() {
        "identifier" => {
            // Bare `exec`, `execSync`, `spawn`, ... — likely destructured
            // from `child_process`. Treat as the corresponding API.
            match node_text(func, source)? {
                "exec" | "execAsync" => CommandApi::JsChildProcessExec,
                "execSync" => CommandApi::JsChildProcessExec,
                "execFile" | "execFileSync" => CommandApi::JsChildProcessExecFile,
                "spawn" | "spawnSync" => CommandApi::JsChildProcessSpawn,
                "fork" => CommandApi::JsChildProcessFork,
                _ => return None,
            }
        }
        "member_expression" => {
            let obj = func.child_by_field_name("object")?;
            let prop = func.child_by_field_name("property")?;
            let prop_text = node_text(prop, source)?;
            let recv = receiver_chain_label(obj, source);
            // Receiver chain may be `child_process` or any of the
            // common alias receivers. We accept:
            //   - `child_process.X(...)`
            //   - `cp.X(...)` (common alias)
            //   - `this.cp.X(...)` (member-of-member; receiver_chain_label
            //     normalizes to last segment lowercase, e.g. `"cp"`)
            //   - `require('child_process').X(...)` (audit B1)
            //   - `(await import('child_process')).X(...)` (audit B1)
            //
            // Audit B9: `process` was previously accepted as an alias
            // here, but `process.exec` / `process.spawn` are not real
            // Node APIs (the global `process` object has none of these
            // methods), so it produced false positives on user-defined
            // `process` objects. Removed.
            let cp_aliases = matches!(recv.as_str(), "child_process" | "cp" | "childprocess");
            let shelljs_aliases = matches!(recv.as_str(), "shelljs" | "shell" | "sh");
            if cp_aliases {
                match prop_text {
                    "exec" | "execAsync" | "execSync" => CommandApi::JsChildProcessExec,
                    "execFile" | "execFileSync" => CommandApi::JsChildProcessExecFile,
                    "spawn" | "spawnSync" => CommandApi::JsChildProcessSpawn,
                    "fork" => CommandApi::JsChildProcessFork,
                    _ => return None,
                }
            } else if shelljs_aliases && prop_text == "exec" {
                CommandApi::JsShellJsExec
            } else {
                return None;
            }
        }
        _ => return None,
    };

    // Audit B6: `child_process.spawn('cmd', args, { shell: true })` (and
    // execFile/spawnSync/execFileSync with the same options object) is
    // semantically equivalent to `exec` because the platform shell still
    // interprets argv[0] AND any args appended on the same shell line.
    // Promote to the always-shell tag so the severity table treats
    // argv[0] correctly.
    let shell_option_true = matches!(
        api,
        CommandApi::JsChildProcessSpawn | CommandApi::JsChildProcessExecFile
    ) && js_spawn_options_shell_true(&arg_nodes, source);
    let api = if shell_option_true {
        CommandApi::JsChildProcessExec
    } else {
        api
    };

    // Detect the canonical shell-c form
    // `child_process.spawn("sh", ["-c", userInput])` (or `execFile` /
    // `spawnSync` / `execFileSync` variants). argv[0] is a string-
    // literal shell binary and the args array starts with `-c` and a
    // variable element. The shell interprets argv[1+] as a command
    // line — a shell-injection sink. Reclassify to `JsSpawnShellC`
    // (always Critical). Mirrors `match_go_call`'s shell-c branch and
    // the Python equivalent.
    if matches!(
        api,
        CommandApi::JsChildProcessSpawn | CommandApi::JsChildProcessExecFile
    ) {
        if let Some(site) = js_detect_shell_c(node, &arg_nodes, source) {
            return Some(site);
        }
    }

    let first = arg_nodes.first().copied()?;
    let arg_kind_first = classify_command_arg_js(first, source);
    // When shell:true is in effect with a literal command string, the
    // argv array (arg[1]) is concatenated onto the shell line — a
    // variable-elements argv is a shell-injection sink in its own right.
    // Promote the classification accordingly.
    let arg_kind = if shell_option_true && matches!(arg_kind_first, CommandArgKind::StaticLiteral) {
        match arg_nodes.get(1).copied() {
            Some(second) => match classify_command_arg_js(second, source) {
                CommandArgKind::StaticLiteral | CommandArgKind::StaticList => arg_kind_first,
                other => other,
            },
            None => arg_kind_first,
        }
    } else {
        arg_kind_first
    };

    // B15: capture verbatim text of static-literal commands for the
    // shell-metachar severity bump.
    let target_text = if matches!(arg_kind, CommandArgKind::StaticLiteral) {
        node_text(first, source).map(|s| s.to_string())
    } else {
        None
    };

    Some(CommandSite {
        call_node: node,
        api,
        arg_kind,
        target_text,
    })
}

/// Recognise the JS `child_process.spawn("sh", ["-c", USER_VAR])` shape.
///
/// Inspects the call's argument list:
///   - arg[0] must be a string-literal shell-binary name
///   - arg[1] must be an array literal whose first element is the
///     literal `"-c"` (or `/c`/`-Command` for Windows)
///   - some element of the array at index ≥ 1 must be non-literal
///
/// On match returns a `CommandSite` tagged `JsSpawnShellC` (always
/// Critical via the severity table). Mirrors `python_detect_shell_c`
/// and the Go shell-c block in `match_go_call`.
fn js_detect_shell_c<'a>(
    node: tree_sitter::Node<'a>,
    arg_nodes: &[tree_sitter::Node<'a>],
    source: &'a [u8],
) -> Option<CommandSite<'a>> {
    let cmd_node = *arg_nodes.first()?;
    let cmd = js_string_literal_value(cmd_node, source)?;
    if !is_shell_binary_name(cmd.as_str()) {
        return None;
    }
    let args_array = *arg_nodes.get(1)?;
    if args_array.kind() != "array" {
        return None;
    }
    let mut elements: Vec<tree_sitter::Node<'_>> = Vec::new();
    let mut cursor = args_array.walk();
    for child in args_array.children(&mut cursor) {
        if child.is_named() {
            elements.push(child);
        }
    }
    if elements.is_empty() {
        return None;
    }
    let flag = js_string_literal_value(elements[0], source)?;
    if !is_shell_c_flag(flag.as_str()) {
        return None;
    }
    // Confirm at least one element after `-c` is non-literal — otherwise
    // the whole array is static and there's no injection sink to flag.
    let any_var = elements.iter().skip(1).any(|e| {
        !matches!(
            classify_command_arg_js(*e, source),
            CommandArgKind::StaticLiteral | CommandArgKind::FunctionLike
        )
    });
    if !any_var {
        return None;
    }
    Some(CommandSite {
        call_node: node,
        api: CommandApi::JsSpawnShellC,
        // The severity table makes `JsSpawnShellC` always Critical
        // regardless of arg_kind; pick the most descriptive shape.
        arg_kind: CommandArgKind::MixedListVarArgv0,
        target_text: None,
    })
}

/// Inspect a JS/TS spawn-style call's argument list for an options
/// object containing `shell: true` (or any non-`false` shell value).
/// Mirrors `python_subprocess_shell_true` for the JS option-object
/// pattern.
///
/// Walks every `object` argument (typically the last one) looking for a
/// `pair` whose `key` resolves to `"shell"` and whose `value` is the
/// `true` literal node. A `value` that is anything other than `false`
/// (e.g. an identifier we can't resolve) is treated conservatively as
/// truthy — matches the typical caller intent.
fn js_spawn_options_shell_true(args: &[tree_sitter::Node<'_>], source: &[u8]) -> bool {
    for arg in args.iter().rev() {
        if arg.kind() != "object" {
            continue;
        }
        let mut cursor = arg.walk();
        for child in arg.children(&mut cursor) {
            if child.kind() != "pair" {
                continue;
            }
            let key = match child.child_by_field_name("key") {
                Some(k) => k,
                None => continue,
            };
            // key may be `property_identifier`, `string`, or
            // `computed_property_name`; accept the first two.
            let key_text = match key.kind() {
                "property_identifier" => node_text(key, source).map(|s| s.to_string()),
                "string" => js_string_literal_value(key, source),
                _ => None,
            };
            if key_text.as_deref() != Some("shell") {
                continue;
            }
            let value = match child.child_by_field_name("value") {
                Some(v) => v,
                None => continue,
            };
            match value.kind() {
                "true" => return true,
                "false" => return false,
                _ => return true, // unknown — assume truthy
            }
        }
        // Found an object arg; further objects are unlikely to be the
        // options bag, but keep scanning in case the convention is
        // different.
    }
    false
}

/// Classify the shape of a JS/TS command-call argument.
#[allow(clippy::only_used_in_recursion)]
fn classify_command_arg_js(node: tree_sitter::Node<'_>, source: &[u8]) -> CommandArgKind {
    match node.kind() {
        "string" => CommandArgKind::StaticLiteral,
        "template_string" => {
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if child.kind() == "template_substitution" {
                    return CommandArgKind::Interpolated;
                }
            }
            CommandArgKind::StaticLiteral
        }
        "binary_expression" => {
            let left = node.child_by_field_name("left");
            let right = node.child_by_field_name("right");
            let mut found_var = false;
            for opt in [left, right].iter().flatten() {
                match classify_command_arg_js(*opt, source) {
                    CommandArgKind::UserVariable
                    | CommandArgKind::Interpolated
                    | CommandArgKind::Unknown => {
                        found_var = true;
                    }
                    _ => {}
                }
            }
            if found_var {
                CommandArgKind::Interpolated
            } else {
                CommandArgKind::StaticLiteral
            }
        }
        "array" => {
            let mut elements: Vec<tree_sitter::Node<'_>> = Vec::new();
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if child.is_named() {
                    elements.push(child);
                }
            }
            classify_list_elements_js(&elements, source)
        }
        "identifier" | "member_expression" | "subscript_expression" | "call_expression" => {
            CommandArgKind::UserVariable
        }
        "arrow_function" | "function_expression" | "function" | "function_declaration" => {
            CommandArgKind::FunctionLike
        }
        "parenthesized_expression" => {
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    return classify_command_arg_js(c, source);
                }
            }
            CommandArgKind::Unknown
        }
        "await_expression"
        | "as_expression"
        | "type_assertion_expression"
        | "non_null_expression"
        | "satisfies_expression" => {
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    return classify_command_arg_js(c, source);
                }
            }
            CommandArgKind::Unknown
        }
        "ternary_expression" => {
            let consequence = node.child_by_field_name("consequence");
            let alternative = node.child_by_field_name("alternative");
            let mut strongest = CommandArgKind::StaticLiteral;
            for opt in [consequence, alternative].iter().flatten() {
                let k = classify_command_arg_js(*opt, source);
                strongest = strongest_arg_kind(strongest, k);
            }
            strongest
        }
        _ => CommandArgKind::Unknown,
    }
}

fn classify_list_elements_js(elements: &[tree_sitter::Node<'_>], source: &[u8]) -> CommandArgKind {
    if elements.is_empty() {
        return CommandArgKind::StaticList;
    }
    let first_kind = classify_command_arg_js(elements[0], source);
    let mut all_literal = matches!(first_kind, CommandArgKind::StaticLiteral);
    let mut any_non_literal = !all_literal;
    for e in &elements[1..] {
        let k = classify_command_arg_js(*e, source);
        match k {
            CommandArgKind::StaticLiteral => {}
            _ => {
                all_literal = false;
                any_non_literal = true;
            }
        }
    }
    if all_literal {
        CommandArgKind::StaticList
    } else if !matches!(first_kind, CommandArgKind::StaticLiteral) {
        CommandArgKind::MixedListVarArgv0
    } else if any_non_literal {
        CommandArgKind::MixedListLiteralArgv0
    } else {
        CommandArgKind::StaticList
    }
}

// ---------------------------------------------------------------------------
// Go
// ---------------------------------------------------------------------------

/// Match a Go `call_expression` against `exec.Command` /
/// `exec.CommandContext` / `syscall.Exec` / `syscall.StartProcess`.
///
/// The shell-injection form `exec.Command("sh", "-c", USER_VAR)` is
/// recognized by inspecting arg[0] (literal shell name), arg[1]
/// (literal `-c` or `/c`) and arg[2] (variable). Detected this way, the
/// site fires at Critical even though arg[0] is a static literal, because
/// the dangerous part is arg[2].
fn match_go_call<'a>(
    node: tree_sitter::Node<'a>,
    source: &'a [u8],
    aliases: &GoImportAliases,
) -> Option<CommandSite<'a>> {
    let func = node.child_by_field_name("function")?;
    if func.kind() != "selector_expression" {
        return None;
    }
    let operand = func.child_by_field_name("operand")?;
    let field = func.child_by_field_name("field")?;
    let raw_operand_label = receiver_chain_label_go(operand, source);
    // Resolve aliases from `import e "os/exec"` etc to their canonical
    // package label so the match arms below stay simple.
    let operand_label = aliases.canonical(&raw_operand_label);
    let field_text = node_text(field, source)?;
    let args = node.child_by_field_name("arguments")?;
    let arg_nodes = collect_named_args(args);

    // `exec.Command(name, args...)`: name is arg[0].
    // `exec.CommandContext(ctx, name, args...)`: name is arg[1].
    // `syscall.Exec(path, argv, envv)`: path is arg[0].
    // `syscall.StartProcess(name, argv, attr)`: name is arg[0].
    let (api_base, name_idx) = match (operand_label, field_text) {
        ("exec", "Command") => (CommandApi::GoExecCommand, 0usize),
        ("exec", "CommandContext") => (CommandApi::GoExecCommand, 1usize),
        ("syscall", "Exec" | "StartProcess") => (CommandApi::GoSyscallExec, 0usize),
        _ => return None,
    };

    // Detect the canonical shell-c form: arg[name_idx] is a literal
    // shell name, arg[name_idx+1] is a literal `-c` / `/c`, and there's
    // at least one further arg that is variable.
    if api_base == CommandApi::GoExecCommand {
        if let (Some(name_node), Some(flag_node)) =
            (arg_nodes.get(name_idx), arg_nodes.get(name_idx + 1))
        {
            let name_lit = go_string_literal_value(*name_node, source);
            let flag_lit = go_string_literal_value(*flag_node, source);
            if let (Some(name), Some(flag)) = (name_lit.as_deref(), flag_lit.as_deref()) {
                if is_shell_binary_name(name) && is_shell_c_flag(flag) {
                    if let Some(rest) = arg_nodes.get(name_idx + 2) {
                        let kind = classify_command_arg_go(*rest, source);
                        if !matches!(
                            kind,
                            CommandArgKind::StaticLiteral | CommandArgKind::FunctionLike
                        ) {
                            return Some(CommandSite {
                                call_node: node,
                                api: CommandApi::GoExecCommandShellC,
                                arg_kind: kind,
                                // GoExecCommandShellC is always Critical
                                // regardless of metachars; no need to
                                // capture the literal here.
                                target_text: None,
                            });
                        }
                    }
                }
            }
        }
    }

    // General form: classify the "name" argument.
    let target = arg_nodes.get(name_idx).copied()?;
    let arg_kind = classify_command_arg_go(target, source);
    // B15: capture verbatim text of static-literal commands for the
    // shell-metachar severity bump.
    let target_text = if matches!(arg_kind, CommandArgKind::StaticLiteral) {
        node_text(target, source).map(|s| s.to_string())
    } else {
        None
    };

    Some(CommandSite {
        call_node: node,
        api: api_base,
        arg_kind,
        target_text,
    })
}

/// Extract the value of a Go `interpreted_string_literal` /
/// `raw_string_literal` if it has no interpolation. Returns `None` for
/// non-string nodes.
///
/// Audit B5: descends through `parenthesized_expression` so
/// `exec.Command(("sh"), "-c", userInput)` is still classified as a
/// shell-c form (parenthesized shell name was previously dropped, which
/// degraded the finding to general-form Low and got it filtered).
fn go_string_literal_value(node: tree_sitter::Node<'_>, source: &[u8]) -> Option<String> {
    match node.kind() {
        "interpreted_string_literal" | "raw_string_literal" => {
            // Look for child "interpreted_string_literal_content" or
            // "raw_string_literal_content". If absent, fall back to
            // stripping the outer quote bytes.
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if matches!(
                    child.kind(),
                    "interpreted_string_literal_content"
                        | "raw_string_literal_content"
                        | "string_content"
                ) {
                    return node_text(child, source).map(|s| s.to_string());
                }
            }
            // Fallback: strip outer quotes.
            let raw = node_text(node, source)?;
            let trimmed = raw
                .strip_prefix('"')
                .and_then(|s| s.strip_suffix('"'))
                .or_else(|| raw.strip_prefix('`').and_then(|s| s.strip_suffix('`')))
                .unwrap_or(raw);
            Some(trimmed.to_string())
        }
        // Recurse through wrapper nodes so `("sh")` is still recognised.
        "parenthesized_expression" => {
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    return go_string_literal_value(c, source);
                }
            }
            None
        }
        _ => None,
    }
}

#[allow(clippy::only_used_in_recursion)]
fn classify_command_arg_go(node: tree_sitter::Node<'_>, source: &[u8]) -> CommandArgKind {
    match node.kind() {
        "interpreted_string_literal" | "raw_string_literal" => CommandArgKind::StaticLiteral,
        "binary_expression" => {
            let left = node.child_by_field_name("left");
            let right = node.child_by_field_name("right");
            let mut found_var = false;
            for opt in [left, right].iter().flatten() {
                match classify_command_arg_go(*opt, source) {
                    CommandArgKind::UserVariable
                    | CommandArgKind::Interpolated
                    | CommandArgKind::Unknown => {
                        found_var = true;
                    }
                    _ => {}
                }
            }
            if found_var {
                CommandArgKind::Interpolated
            } else {
                CommandArgKind::StaticLiteral
            }
        }
        "identifier" | "selector_expression" | "index_expression" | "call_expression" => {
            CommandArgKind::UserVariable
        }
        "func_literal" => CommandArgKind::FunctionLike,
        "parenthesized_expression" => {
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    return classify_command_arg_go(c, source);
                }
            }
            CommandArgKind::Unknown
        }
        // Composite-literal `[]string{"a", b}` — treat as list.
        "composite_literal" => {
            // Walk into the literal_value child for elements.
            let mut elements: Vec<tree_sitter::Node<'_>> = Vec::new();
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if child.kind() == "literal_value" {
                    let mut c2 = child.walk();
                    for inner in child.children(&mut c2) {
                        if inner.is_named() {
                            // `keyed_element` for indexed elements; descend.
                            if inner.kind() == "keyed_element" {
                                if let Some(value) = inner.child_by_field_name("value") {
                                    elements.push(value);
                                }
                            } else if inner.kind() != "literal_element" {
                                elements.push(inner);
                            } else {
                                // literal_element wraps the value.
                                for j in 0..inner.named_child_count() {
                                    if let Some(c) = inner.named_child(j) {
                                        elements.push(c);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            classify_list_elements_go(&elements, source)
        }
        _ => CommandArgKind::Unknown,
    }
}

fn classify_list_elements_go(elements: &[tree_sitter::Node<'_>], source: &[u8]) -> CommandArgKind {
    if elements.is_empty() {
        return CommandArgKind::StaticList;
    }
    let first_kind = classify_command_arg_go(elements[0], source);
    let mut all_literal = matches!(first_kind, CommandArgKind::StaticLiteral);
    let mut any_non_literal = !all_literal;
    for e in &elements[1..] {
        let k = classify_command_arg_go(*e, source);
        match k {
            CommandArgKind::StaticLiteral => {}
            _ => {
                all_literal = false;
                any_non_literal = true;
            }
        }
    }
    if all_literal {
        CommandArgKind::StaticList
    } else if !matches!(first_kind, CommandArgKind::StaticLiteral) {
        CommandArgKind::MixedListVarArgv0
    } else if any_non_literal {
        CommandArgKind::MixedListLiteralArgv0
    } else {
        CommandArgKind::StaticList
    }
}

// `receiver_chain_label_go` lives in `ast_helpers`; imported above.

/// Resolved alias map for Go `import` declarations.
///
/// Audit B2 fix: `import e "os/exec"` followed by `e.Command(b)` was
/// silently missed because the matcher used a literal text-equality
/// check on the receiver. We now scan the file's `import_declaration`
/// nodes once and build an alias→canonical-package map so any
/// non-canonical receiver name maps back to `exec` / `syscall`.
#[derive(Debug, Default, Clone)]
struct GoImportAliases {
    /// Set of identifier names that should be treated as the `os/exec`
    /// package. Always contains `"exec"` (the canonical form). Adds any
    /// alias from `import alias "os/exec"`.
    exec_aliases: std::collections::HashSet<String>,
    /// Same idea for `syscall`.
    syscall_aliases: std::collections::HashSet<String>,
}

impl GoImportAliases {
    /// Resolve a (lowercased) receiver label to its canonical Go package
    /// name. Returns `"exec"` / `"syscall"` for known aliases, otherwise
    /// the input string.
    fn canonical<'s>(&self, label: &'s str) -> &'s str {
        if self.exec_aliases.contains(label) {
            return "exec";
        }
        if self.syscall_aliases.contains(label) {
            return "syscall";
        }
        label
    }
}

/// Walk the Go file root and collect `(alias → canonical-package)`
/// mappings for the dangerous packages (`os/exec`, `syscall`). Returns
/// the canonical names by default if no alias was given.
fn collect_go_import_aliases(root: tree_sitter::Node<'_>, source: &[u8]) -> GoImportAliases {
    let mut out = GoImportAliases::default();
    // Canonical package names always resolve to themselves.
    out.exec_aliases.insert("exec".to_string());
    out.syscall_aliases.insert("syscall".to_string());

    fn visit(node: tree_sitter::Node<'_>, source: &[u8], out: &mut GoImportAliases, depth: u8) {
        // Imports are always at file scope; bail past a reasonable
        // descent depth to avoid walking the whole file.
        if depth > 4 {
            return;
        }
        if node.kind() == "import_spec" {
            handle_import_spec(node, source, out);
            return;
        }
        let mut cursor = node.walk();
        for child in node.children(&mut cursor) {
            visit(child, source, out, depth + 1);
        }
    }

    fn handle_import_spec(node: tree_sitter::Node<'_>, source: &[u8], out: &mut GoImportAliases) {
        let path = match node.child_by_field_name("path") {
            Some(p) => p,
            None => return,
        };
        // Strip outer quotes from `"os/exec"` / `"syscall"`.
        let raw = match node_text(path, source) {
            Some(s) => s,
            None => return,
        };
        let pkg_path = raw
            .strip_prefix('"')
            .and_then(|s| s.strip_suffix('"'))
            .unwrap_or(raw);
        // Determine the local name. If there's a `name` field, use
        // that; otherwise default to the last segment of the path.
        let local = match node.child_by_field_name("name") {
            Some(n) => node_text(n, source).map(|s| s.to_string()),
            None => pkg_path.rsplit('/').next().map(|s| s.to_string()),
        };
        let local = match local {
            Some(s) if !s.is_empty() && s != "_" && s != "." => s,
            _ => return,
        };
        let local_lower = local.to_lowercase();
        match pkg_path {
            "os/exec" => {
                out.exec_aliases.insert(local_lower);
            }
            "syscall" => {
                out.syscall_aliases.insert(local_lower);
            }
            _ => {}
        }
    }

    visit(root, source, &mut out, 0);
    out
}

// ---------------------------------------------------------------------------
// Generic helpers (mirrored from eval_detector)
// ---------------------------------------------------------------------------

// `collect_named_args` and `unwrap_callee` live in `ast_helpers`; imported
// at the top of the module.

/// Lowercased "receiver label" for a JS/TS member-call receiver.
/// Mirrors `eval_detector::receiver_chain_label`.
///
/// Audit B1 extension: when the receiver is itself a `call_expression`
/// of `require('child_process')` (or `import('child_process')` inside an
/// `await_expression`), return the canonical `"child_process"` label so
/// that `require('child_process').exec(userInput)` is recognised as
/// `child_process.exec(...)`. This is the single most idiomatic Node
/// shape and was silently missed in the AST migration.
///
/// Implementation: delegate to the shared
/// [`receiver_chain_label`](crate::detectors::security::ast_helpers::receiver_chain_label)
/// passing this detector's local [`call_expression_module_label`] as the
/// resolver — that's the only piece that varies between detectors (it
/// names the dangerous module(s) for *this* detector, e.g.
/// `child_process`).
fn receiver_chain_label(node: tree_sitter::Node<'_>, source: &[u8]) -> String {
    receiver_chain_label_shared(node, source, Some(&call_expression_module_label))
}

/// If `node` is `require('MODULE')` or `import('MODULE')`, return the
/// canonical lowercased module name. Returns `None` for other shapes.
///
/// Recognised callees: bare identifier `require`, bare identifier
/// `import`, and the `import` keyword node (some grammars emit
/// `import_expression`/`import_keyword`).
fn call_expression_module_label(
    node: tree_sitter::Node<'_>,
    source: &[u8],
) -> Option<&'static str> {
    debug_assert_eq!(node.kind(), "call_expression");
    let func = node.child_by_field_name("function")?;
    let func_text = node_text(func, source)?;
    let is_require_or_import =
        matches!(func.kind(), "identifier" | "import") && matches!(func_text, "require" | "import");
    if !is_require_or_import {
        return None;
    }
    let args = node.child_by_field_name("arguments")?;
    let arg_nodes = collect_named_args(args);
    let first = arg_nodes.first()?;
    let module = js_string_literal_value(*first, source)?;
    // Map known dangerous modules to their canonical label.
    match module.as_str() {
        "child_process" | "node:child_process" => Some("child_process"),
        _ => None,
    }
}

/// Extract the inner content of a JS/TS string literal node, stripping
/// outer quotes. Returns `None` for template strings (with or without
/// substitutions) — only single/double-quoted literals are accepted.
fn js_string_literal_value(node: tree_sitter::Node<'_>, source: &[u8]) -> Option<String> {
    if node.kind() != "string" {
        return None;
    }
    // tree-sitter-javascript exposes `string_fragment` children for the
    // raw text of a string literal. Concatenate them in order.
    let mut cursor = node.walk();
    let mut buf = String::new();
    let mut saw_fragment = false;
    for child in node.children(&mut cursor) {
        if child.kind() == "string_fragment" {
            if let Some(t) = node_text(child, source) {
                buf.push_str(t);
                saw_fragment = true;
            }
        }
    }
    if saw_fragment {
        return Some(buf);
    }
    // Fallback: strip the outer quotes from the full text. Empty string
    // literal `""` has no `string_fragment` child.
    let raw = node_text(node, source)?;
    let inner = raw
        .strip_prefix('"')
        .and_then(|s| s.strip_suffix('"'))
        .or_else(|| raw.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')))?;
    Some(inner.to_string())
}

/// Combine two `CommandArgKind`s and keep the strongest signal.
fn strongest_arg_kind(a: CommandArgKind, b: CommandArgKind) -> CommandArgKind {
    fn rank(k: CommandArgKind) -> u8 {
        match k {
            CommandArgKind::MixedListVarArgv0 => 6,
            CommandArgKind::UserVariable => 5,
            CommandArgKind::Interpolated => 4,
            CommandArgKind::MixedListLiteralArgv0 => 3,
            CommandArgKind::Unknown => 2,
            CommandArgKind::FunctionLike => 1,
            CommandArgKind::StaticList => 0,
            CommandArgKind::StaticLiteral => 0,
        }
    }
    if rank(a) >= rank(b) {
        a
    } else {
        b
    }
}

// `node_text` lives in `ast_helpers`; imported above.

// ---------------------------------------------------------------------------
// Line scanner (Ruby, PHP, Java, sh)
// ---------------------------------------------------------------------------

/// Recognize common command-injection forms in non-AST languages.
/// Returns `(api, classified-arg-shape)` if the line looks like one.
fn match_line_command(line: &str, ext: &str) -> Option<(CommandApi, CommandArgKind)> {
    static RUBY_BACKTICK_RE: LazyLock<Regex> =
        LazyLock::new(|| Regex::new(r"`[^`\n]*`").expect("valid regex"));
    // Audit B4: require a non-member-access prefix before the function
    // name so `obj.system(x)` (a method call) does not fire as the
    // bare-global `system(x)`. The leading `(?:^|[^.>])` guards against
    // `obj.system` and `$obj->system`. Includes `popen` (audit B8).
    static RUBY_SYSTEM_RE: LazyLock<Regex> =
        LazyLock::new(|| Regex::new(r"(?:^|[^.>])\b(system|exec)\s*\(").expect("valid regex"));
    static PHP_SHELL_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(r"(?:^|[^.>])\b(system|shell_exec|passthru|proc_open|popen|exec)\s*\(")
            .expect("valid regex")
    });
    static JAVA_RUNTIME_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(r"(Runtime\.getRuntime\(\)\.exec|new\s+ProcessBuilder)\s*\(")
            .expect("valid regex")
    });

    match ext {
        "rb" => {
            // Backtick string with `#{...}` interpolation → Critical.
            if let Some(m) = RUBY_BACKTICK_RE.find(line) {
                let bt = m.as_str();
                if bt.contains("#{") {
                    return Some((CommandApi::PyOsSystem, CommandArgKind::Interpolated));
                }
                return Some((CommandApi::PyOsSystem, CommandArgKind::StaticLiteral));
            }
            // `system(...)` / `exec(...)` — line-level arg-shape probe.
            if let Some(m) = RUBY_SYSTEM_RE.find(line) {
                let after = &line[m.end()..];
                let arg = classify_line_arg(after);
                // Ruby `system`/`exec` are shell-aware in the
                // single-string form. We model as PyOsSystem (always-shell)
                // for severity purposes — it is the closest analog.
                return Some((CommandApi::PyOsSystem, arg));
            }
            None
        }
        "php" => {
            if let Some(m) = PHP_SHELL_RE.find(line) {
                let after = &line[m.end()..];
                let arg = classify_line_arg(after);
                let api = if line.contains("shell_exec(") {
                    CommandApi::JsShellJsExec
                } else {
                    CommandApi::PyOsSystem
                };
                return Some((api, arg));
            }
            None
        }
        "java" => {
            if let Some(m) = JAVA_RUNTIME_RE.find(line) {
                let after = &line[m.end()..];
                let arg = classify_line_arg(after);
                return Some((CommandApi::PyOsSystem, arg));
            }
            None
        }
        "sh" => None,
        _ => None,
    }
}

/// Cheap line-text classification of a command argument for the
/// non-AST languages.
fn classify_line_arg(after_paren: &str) -> CommandArgKind {
    let trimmed = after_paren.trim_start();
    if trimmed.starts_with('[') || trimmed.starts_with("array(") {
        // Array form. We can't easily classify content here; assume
        // mixed shape with literal argv[0].
        return CommandArgKind::MixedListLiteralArgv0;
    }
    if trimmed.starts_with('"') || trimmed.starts_with('\'') {
        let quote = trimmed.as_bytes()[0];
        let mut i = 1;
        let bytes = trimmed.as_bytes();
        let mut had_interp = false;
        while i < bytes.len() {
            let c = bytes[i];
            if c == b'\\' {
                i += 2;
                continue;
            }
            if c == quote {
                break;
            }
            // Ruby `"#{...}"`.
            if quote == b'"' && c == b'#' && bytes.get(i + 1) == Some(&b'{') {
                had_interp = true;
            }
            // PHP `"$var"` and `"{$var}"`.
            if quote == b'"' && c == b'$' {
                had_interp = true;
            }
            i += 1;
        }
        // Then check for trailing `+` or `.` concat — text-level signal
        // that the arg is built from a literal plus a variable.
        let after_str = std::str::from_utf8(&bytes[i + 1..]).unwrap_or("");
        let concat =
            after_str.trim_start().starts_with('+') || after_str.trim_start().starts_with('.');
        if had_interp || concat {
            CommandArgKind::Interpolated
        } else {
            CommandArgKind::StaticLiteral
        }
    } else if trimmed.starts_with(')') {
        CommandArgKind::Unknown
    } else {
        // Identifier / member access / superglobal (`$_GET[...]`).
        CommandArgKind::UserVariable
    }
}

/// Returns the list of shell metacharacters present in a static-literal
/// command string. Used to bump severity for literals that look static
/// but contain command-chaining / substitution operators (audit B15).
///
/// Only the metacharacters that change control-flow or invoke a
/// subshell are listed; quoting and globbing are excluded because they
/// don't, on their own, escalate a literal to an injection vector.
fn shell_metachars_in(s: &str) -> Vec<&'static str> {
    let mut out = Vec::new();
    if s.contains(';') {
        out.push(";");
    }
    if s.contains("&&") {
        out.push("&&");
    }
    if s.contains("||") {
        out.push("||");
    }
    if s.contains('|') && !s.contains("||") {
        out.push("|");
    }
    if s.contains('`') {
        out.push("`");
    }
    if s.contains("$(") {
        out.push("$(");
    }
    if s.contains('>') && !s.contains(">(") {
        out.push(">");
    }
    if s.contains("<(") {
        out.push("<(");
    }
    if s.contains(">(") {
        out.push(">(");
    }
    out
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;
    use crate::detectors::base::Detector;
    use crate::graph::builder::GraphBuilder;

    // -------------------------------------------------------------
    // Pre-existing tests (preserved as smoke / shape regression).
    // -------------------------------------------------------------

    #[test]
    fn test_detects_os_system_with_user_input() {
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![(
                "vuln.py",
                "import os\n\ndef run_command(user_input):\n    os.system(\"ls \" + user_input)\n",
            )],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect os.system with user input concatenation"
        );
        assert!(
            findings
                .iter()
                .any(|f| f.title.to_lowercase().contains("command injection")),
            "Finding should mention command injection. Titles: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
        assert!(
            findings
                .iter()
                .any(|f| f.cwe_id.as_deref() == Some("CWE-78")),
            "Finding should have CWE-78"
        );
    }

    #[test]
    fn test_no_findings_for_safe_subprocess() {
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("safe.py", "import subprocess\n\ndef list_files():\n    result = subprocess.run([\"ls\", \"-la\"], capture_output=True)\n    return result.stdout\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Safe subprocess usage with list args should have no findings, but got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_subprocess_shell_true_python() {
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("run.py", "import subprocess\n\ndef execute(user_input):\n    subprocess.call(\"grep \" + user_input, shell=True)\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect subprocess.call with shell=True and user input"
        );
        assert!(
            findings
                .iter()
                .any(|f| f.cwe_id.as_deref() == Some("CWE-78")),
            "Finding should have CWE-78"
        );
    }

    #[test]
    fn test_detects_child_process_exec_with_template_js() {
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("handler.js", "const { exec } = require('child_process');\n\nfunction runCommand(req, res) {\n    const userId = req.params.id;\n    child_process.exec(`find /data -user ${userId}`);\n}\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect child_process.exec with template literal interpolation"
        );
        assert!(
            findings
                .iter()
                .any(|f| f.title.to_lowercase().contains("command injection")),
            "Finding should mention command injection. Titles: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_no_finding_for_exec_in_comment() {
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("safe.js", "// Dangerous example: os.system(user_input) - never do this\nfunction safeFunc() {\n    return 42;\n}\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "os.system in a comment should not produce findings, but got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_go_exec_command() {
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("handler.go", "package main\n\nimport (\n\t\"os/exec\"\n\t\"net/http\"\n)\n\nfunc runCmd(w http.ResponseWriter, r *http.Request) {\n\tcmd := r.FormValue(\"command\")\n\texec.Command(cmd)\n}\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect exec.Command with user input from r.FormValue. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
        assert!(
            findings
                .iter()
                .any(|f| f.title.to_lowercase().contains("exec.command")),
            "Finding should mention exec.Command. Titles: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    // =================================================================
    // Cohort 1 — currently-passing-shape (must remain green post-migration)
    // =================================================================

    #[test]
    fn test_detects_subprocess_run_with_shell_true_python() {
        let content = "import subprocess\n\ndef run(user_input):\n    subprocess.run(user_input, shell=True)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect subprocess.run with shell=True and user input. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
        assert!(
            findings
                .iter()
                .any(|f| f.cwe_id.as_deref() == Some("CWE-78")),
            "Finding should carry CWE-78"
        );
    }

    #[test]
    fn test_detects_os_system_python() {
        let content = "import os\n\ndef run(user_input):\n    os.system(user_input)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect os.system(user_input). Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_child_process_exec_javascript() {
        let content = "const child_process = require('child_process');\n\nfunction run(req, res) {\n    child_process.exec(req.body.cmd);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect child_process.exec(req.body.cmd). Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_ruby_backtick_with_interpolation() {
        let content = "def list_files(user_path)\n  result = `ls #{user_path}`\n  result\nend\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.rb", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        // Forward-compatible regression guard — Ruby is line-path only.
        let _ = findings;
    }

    #[test]
    fn test_detects_php_system_with_user_input() {
        let content = "<?php\nfunction run() {\n    system($_GET['cmd']);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.php", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        let _ = findings;
    }

    #[test]
    fn test_skips_command_in_comment() {
        let content = "import subprocess\n\ndef safe(x):\n    # subprocess.run(thing, shell=True)\n    return x\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("safe.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "subprocess.run inside a comment must not fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    // =================================================================
    // Cohort 2 — audit-shape (line scanner may FP / FN; AST migration must fix)
    // =================================================================

    #[test]
    fn test_skips_subprocess_run_with_static_list_args() {
        let content =
            "import subprocess\n\ndef list_files():\n    subprocess.run([\"git\", \"status\"])\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("safe.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "subprocess.run with all-literal list args is safe. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_subprocess_run_with_user_in_list_first_position() {
        let content = "import subprocess\n\ndef run(user_binary):\n    subprocess.run([user_binary, \"--flag\"])\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "argv[0] user-controlled in subprocess.run list must fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_skips_string_literal_mentioning_subprocess() {
        let content =
            "def doc():\n    msg = \"Use subprocess.run() to call commands\"\n    return msg\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("docs.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "subprocess.run inside a string literal must not fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_skips_exec_as_method_name() {
        let content =
            "class Runner:\n    def exec(self, cmd):\n        return cmd\n\nr = Runner()\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("safe.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "exec as a method-name definition must not fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_subprocess_with_concatenation() {
        let content = "import subprocess\n\ndef run(user_dir):\n    subprocess.run(\"ls \" + user_dir, shell=True)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "subprocess.run with `+`-concatenated tainted arg + shell=True must fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    // =================================================================
    // Cohort 3 — audit-pending (resolved by the AST migration)
    // =================================================================

    #[test]
    fn test_detects_python_fstring_in_subprocess() {
        let content = "import subprocess\n\ndef run(user_dir):\n    subprocess.run(f\"ls {user_dir}\", shell=True)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "f-string with user_dir interpolation must fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_js_template_literal_in_exec() {
        let content = "const child_process = require('child_process');\n\nfunction run(userDir) {\n    child_process.exec(`ls ${userDir}`);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "template-literal interpolation in exec must fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_severity_critical_for_user_input_low_for_static_literal() {
        let content =
            "import os\n\ndef run(user_input):\n    os.system(user_input)\n    os.system(\"date\")\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("mixed.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        let critical_count = findings
            .iter()
            .filter(|f| f.severity == Severity::Critical)
            .count();
        let low_count = findings
            .iter()
            .filter(|f| f.severity == Severity::Low)
            .count();
        assert!(
            critical_count >= 1,
            "Expected >=1 Critical for tainted os.system. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, &f.severity))
                .collect::<Vec<_>>()
        );
        assert!(
            findings.len() <= critical_count + low_count,
            "Static-literal os.system should be Low or filtered, not Critical. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, &f.severity))
                .collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_go_exec_command_with_user_arg() {
        let content = "package main\n\nimport (\n\t\"os/exec\"\n\t\"net/http\"\n)\n\nfunc runOne(r *http.Request) {\n\tuserBin := r.FormValue(\"bin\")\n\texec.Command(userBin, \"--flag\")\n}\n\nfunc runTwo(r *http.Request) {\n\tuserInput := r.FormValue(\"cmd\")\n\texec.Command(\"sh\", \"-c\", userInput)\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.go", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.len() >= 2,
            "Both exec.Command call shapes must fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    // =================================================================
    // Self-audit regression tests (B1–B11)
    // =================================================================

    /// B1 (CRITICAL): `require('child_process').exec(userInput)` must fire.
    #[test]
    fn test_b1_require_child_process_exec_detected() {
        let content =
            "function run(req, res) {\n    require('child_process').exec(req.body.cmd);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "require('child_process').exec(...) must produce a finding. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B1 (CRITICAL): `require('child_process').execSync(userInput)` must fire.
    #[test]
    #[allow(non_snake_case)]
    fn test_b1_require_child_process_execSync_detected() {
        let content =
            "function run(userInput) {\n    require('child_process').execSync(userInput);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "require('child_process').execSync(...) must produce a finding. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B1 (CRITICAL): `(await import('child_process')).exec(userInput)` must fire.
    #[test]
    fn test_b1_await_import_child_process_exec_detected() {
        let content = "async function run(userInput) {\n    (await import('child_process')).exec(userInput);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "(await import('child_process')).exec(...) must produce a finding. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B2/B3 (HIGH): Aliased Go import `import e "os/exec"; e.Command(b)`.
    #[test]
    fn test_b2_go_aliased_exec_command_detected() {
        let content =
            "package main\n\nimport e \"os/exec\"\n\nfunc handler(b string) {\n\te.Command(b)\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.go", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Aliased exec.Command via `import e \"os/exec\"` must fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B2/B3 (HIGH): Aliased Go import + CommandContext.
    #[test]
    fn test_b2_go_aliased_exec_commandcontext_detected() {
        let content = "package main\n\nimport (\n\tx \"os/exec\"\n\t\"context\"\n)\n\nfunc handler(ctx context.Context, b string) {\n\tx.CommandContext(ctx, b)\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.go", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Aliased exec.CommandContext must fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B4 (MEDIUM): PHP `$obj->system($input)` is a method call, not the global.
    #[test]
    fn test_b4_php_object_method_does_not_fire() {
        let content = "<?php\nfunction run($obj, $input) {\n    $obj->system($input);\n    $obj->exec($input);\n    $obj->shell_exec($input);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("safe.php", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Object-method `$obj->system($x)` must not fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B4 (MEDIUM): Ruby `obj.system(arg)` is a method call, not Kernel#system.
    #[test]
    fn test_b4_ruby_object_method_does_not_fire() {
        let content = "def run(obj, arg)\n  obj.system(arg)\n  obj.exec(arg)\nend\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("safe.rb", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Object-method `obj.system(x)` must not fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B5 (MEDIUM): `exec.Command(("sh"), "-c", userInput)` shell-c form.
    #[test]
    fn test_b5_go_shell_c_with_parenthesized_name_detected() {
        let content = "package main\n\nimport (\n\t\"os/exec\"\n\t\"net/http\"\n)\n\nfunc handler(r *http.Request) {\n\tuserInput := r.FormValue(\"cmd\")\n\texec.Command((\"sh\"), \"-c\", userInput)\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.go", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.iter().any(|f| f.severity == Severity::Critical),
            "Parenthesized shell name in shell-c form must produce Critical. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, &f.severity))
                .collect::<Vec<_>>()
        );
    }

    /// B6 (MEDIUM): `child_process.spawn('cmd', args, { shell: true })` boosted.
    #[test]
    fn test_b6_js_spawn_with_shell_true_option_boosted() {
        let content = "const child_process = require('child_process');\nfunction run(userArgs) {\n    child_process.spawn('cmd', userArgs, { shell: true });\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        // shell:true makes spawn equivalent to exec; static literal cmd
        // should still surface a finding because the shell still
        // interprets the arguments. We require at least one finding (not
        // filtered as Low).
        assert!(
            !findings.is_empty(),
            "spawn(..., {{ shell: true }}) must boost severity above Low. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B6 (MEDIUM): `child_process.execFile('cmd', args, { shell: true })` boosted.
    #[test]
    fn test_b6_js_execfile_with_shell_true_option_boosted() {
        let content = "const child_process = require('child_process');\nfunction run(userArgs) {\n    child_process.execFile('cmd', userArgs, { shell: true });\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "execFile(..., {{ shell: true }}) must boost severity above Low. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B8 (MEDIUM): PHP `popen($_GET['cmd'], 'r')` must fire.
    #[test]
    fn test_b8_php_popen_with_user_input_detected() {
        let content = "<?php\nfunction run() {\n    popen($_GET['cmd'], 'r');\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.php", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "popen($_GET['cmd'], 'r') must produce a finding. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B9 (LOW): `process.exec(x)` is not a real Node API; must not fire.
    #[test]
    fn test_b9_process_exec_does_not_fire_as_child_process() {
        let content = "class Runner {}\nconst process = new Runner();\nprocess.exec = function(x){};\nfunction run(input) {\n    process.exec(input);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("safe.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "`process.exec(x)` is not a real Node API and must not fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    // ----- Python from-import alias resolution (closes deferred B10) -----

    /// `from os import system; system(user_input)` — bare-call must
    /// fire. Previously missed because the matcher only inspected
    /// `attribute` callees. Mirrors `insecure_crypto`'s
    /// `test_python_bare_md5_after_from_import`.
    #[test]
    fn test_python_bare_system_after_from_import() {
        let content = "from os import system\n\ndef run(user_input):\n    system(user_input)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("h.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.iter().any(|f| f.line_start == Some(4)),
            "Should fire on `system(user_input)` after `from os import system`. Got: {:?}",
            findings
                .iter()
                .map(|f| (f.line_start, &f.title))
                .collect::<Vec<_>>()
        );
    }

    /// `from subprocess import run; run(user_input, shell=True)` —
    /// bare-call with shell=True must fire Critical.
    #[test]
    fn test_python_bare_subprocess_run_after_from_import() {
        let content =
            "from subprocess import run\n\ndef go(user_input):\n    run(user_input, shell=True)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("h.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings
                .iter()
                .any(|f| f.line_start == Some(4) && f.severity == Severity::Critical),
            "Should fire Critical on `run(user_input, shell=True)` after `from subprocess import run`. Got: {:?}",
            findings
                .iter()
                .map(|f| (f.line_start, f.severity, &f.title))
                .collect::<Vec<_>>()
        );
    }

    /// Audit shape: `import subprocess as sp; sp.run(user, shell=True)`.
    ///
    /// `sp.run(...)` is an attribute call whose object text is `"sp"`,
    /// not `"subprocess"`. Without the module-alias resolver the
    /// `classify_python_command_callee("sp", "run", ...)` lookup
    /// misses (no entry under `"sp"`). Mirrors
    /// `test_python_bare_subprocess_run_after_from_import`, but for
    /// the `import M as N` shape.
    #[test]
    fn test_python_aliased_module_subprocess_run_detected() {
        let content =
            "import subprocess as sp\n\ndef go(user_input):\n    sp.run(user_input, shell=True)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("h.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings
                .iter()
                .any(|f| f.line_start == Some(4) && f.severity == Severity::Critical),
            "Should fire Critical on `sp.run(user_input, shell=True)` after `import subprocess as sp`. Got: {:?}",
            findings
                .iter()
                .map(|f| (f.line_start, f.severity, &f.title))
                .collect::<Vec<_>>()
        );
    }

    // -------------------------------------------------------------
    // B15: shell metacharacters in static-literal commands.
    // -------------------------------------------------------------

    /// `os.system("ls; rm -rf /")` — literal but with `;` shell
    /// metachar. Should fire at Medium (not skipped as Low literal).
    #[test]
    fn test_b15_static_literal_with_semicolon_chain_python_os_system() {
        let content = "import os\nos.system(\"ls; rm -rf /\")\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("a.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection");
        assert!(
            findings.iter().any(|f| f.severity == Severity::Medium),
            "B15: static literal with `;` must fire Medium. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }

    /// `subprocess.run("cat foo | grep bar", shell=True)` — pipe
    /// metachar in a static literal must fire Medium.
    #[test]
    fn test_b15_static_literal_with_pipe_python_subprocess_shell_true() {
        let content = "import subprocess\nsubprocess.run(\"cat foo | grep bar\", shell=True)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("a.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection");
        assert!(
            findings.iter().any(|f| f.severity == Severity::Medium),
            "B15: static literal with `|` must fire Medium. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }

    /// `os.system("echo $(date)")` — `$(...)` command substitution in
    /// a static literal must fire Medium.
    #[test]
    fn test_b15_static_literal_with_dollar_paren_subst_python() {
        let content = "import os\nos.system(\"echo $(date)\")\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("a.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection");
        assert!(
            findings.iter().any(|f| f.severity == Severity::Medium),
            "B15: static literal with `$(` must fire Medium. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }

    /// `os.system("date")` — no metachars; must NOT fire Medium+ (the
    /// existing Low classification is filtered by the post-processor).
    #[test]
    fn test_b15_static_literal_no_metachar_still_low_or_skipped() {
        let content = "import os\nos.system(\"date\")\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("a.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection");
        let medium_or_above = findings
            .iter()
            .filter(|f| f.severity >= Severity::Medium)
            .count();
        assert_eq!(
            medium_or_above,
            0,
            "Static literal `date` (no metachar) must not fire Medium+. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }

    // -------------------------------------------------------------
    // §1 (batch 1): MixedListLiteralArgv0 demotion + Python/JS shell-c
    // reclassification. These mirror the Go shell-c handling so the
    // detector's CWE-78 mapping is structurally correct: with shell=
    // False and a literal argv[0], execve passes argv[≥1] as raw
    // strings to the target binary's main(argc, argv), so there is no
    // shell-injection vector. The shell-c sub-shape is reclassified
    // to its own always-Critical API tag.
    // -------------------------------------------------------------

    /// `subprocess.Popen(["xdg-open", url])` with `url` a function
    /// parameter. Was High under `MixedListLiteralArgv0`; with
    /// shell=False and a literal argv[0] this is structurally not
    /// CWE-78, so should now be Low (and filtered out).
    #[test]
    fn test_python_subprocess_list_literal_argv0_with_url_param_is_low() {
        let content =
            "import subprocess\n\ndef open_url(url):\n    subprocess.Popen([\"xdg-open\", url])\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("u.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        // Low findings are filtered. The previous behaviour would have
        // surfaced a High finding here.
        let high_or_critical = findings
            .iter()
            .filter(|f| matches!(f.severity, Severity::High | Severity::Critical))
            .count();
        assert_eq!(
            high_or_critical,
            0,
            "Literal argv[0] (`xdg-open`) + variable later arg with shell=False \
             must not produce High/Critical findings. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }

    /// `subprocess.call(["explorer", f"/select,{url}"])` — same shape:
    /// fixed argv[0], variable later arg. shell=False ⇒ no CWE-78.
    #[test]
    fn test_python_subprocess_list_literal_argv0_with_filepath_is_low() {
        let content = "import subprocess\n\ndef show(url):\n    subprocess.call([\"explorer\", f\"/select,{url}\"])\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("p.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        let high_or_critical = findings
            .iter()
            .filter(|f| matches!(f.severity, Severity::High | Severity::Critical))
            .count();
        assert_eq!(
            high_or_critical,
            0,
            "Literal argv[0] + interpolated later arg with shell=False must not \
             produce High/Critical findings. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }

    /// `subprocess.run(["sh", "-c", user_input])` — the textbook shell-c
    /// shape that previously fell through to `MixedListLiteralArgv0`
    /// (High). With explicit Python shell-c reclassification this is
    /// now `PySubprocessShellC`, always Critical.
    #[test]
    fn test_python_subprocess_sh_dash_c_user_input_critical() {
        let content = "import subprocess\n\ndef run(user_input):\n    subprocess.run([\"sh\", \"-c\", user_input])\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("h.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.iter().any(|f| f.severity == Severity::Critical),
            "subprocess.run([\"sh\", \"-c\", user_input]) must fire Critical. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }

    /// `subprocess.run(["bash", "-c", user_input])` and
    /// `subprocess.run(["/bin/bash", "-c", user_input])` — same
    /// reclassification, Critical.
    #[test]
    fn test_python_subprocess_bash_dash_c_user_input_critical() {
        for variant in &["bash", "/bin/bash"] {
            let content = format!(
                "import subprocess\n\ndef run(user_input):\n    subprocess.run([\"{}\", \"-c\", user_input])\n",
                variant
            );
            let store = GraphBuilder::new().freeze();
            let detector = CommandInjectionDetector::new("/mock/repo");
            let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
                &store,
                vec![("h.py", content.as_str())],
            );
            let findings = detector.detect(&ctx).expect("detection should succeed");
            assert!(
                findings.iter().any(|f| f.severity == Severity::Critical),
                "subprocess.run([\"{}\", \"-c\", user_input]) must fire Critical. Got: {:?}",
                variant,
                findings
                    .iter()
                    .map(|f| (&f.title, f.severity))
                    .collect::<Vec<_>>()
            );
        }
    }

    /// `child_process.spawn("sh", ["-c", userInput])` — JS analogue of
    /// the shell-c reclassification. Previously fell through to
    /// `MixedListLiteralArgv0` (High); now always Critical via
    /// `JsSpawnShellC`.
    #[test]
    fn test_js_spawn_sh_dash_c_user_critical() {
        let content = "const child_process = require('child_process');\nfunction run(userInput) {\n    child_process.spawn(\"sh\", [\"-c\", userInput]);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("h.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.iter().any(|f| f.severity == Severity::Critical),
            "child_process.spawn(\"sh\", [\"-c\", userInput]) must fire Critical. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }

    /// `child_process.spawn("xdg-open", [url])` — literal argv[0] with
    /// variable later arg, no shell. Was High under
    /// `MixedListLiteralArgv0`; now demoted to Low (filtered).
    #[test]
    fn test_js_spawn_literal_argv0_with_user_arg_low() {
        let content = "const child_process = require('child_process');\nfunction openUrl(url) {\n    child_process.spawn(\"xdg-open\", [url]);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("o.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        let high_or_critical = findings
            .iter()
            .filter(|f| matches!(f.severity, Severity::High | Severity::Critical))
            .count();
        assert_eq!(
            high_or_critical,
            0,
            "spawn(\"xdg-open\", [url]) must not produce High/Critical. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }

    /// `subprocess.run([cmd, "--flag"])` — argv[0] is variable. This is
    /// `MixedListVarArgv0` and is **untouched** by the §1 fix; must
    /// stay Critical.
    #[test]
    fn test_python_subprocess_var_argv0_unchanged() {
        let content = "import subprocess\n\ndef go(cmd):\n    subprocess.run([cmd, \"--flag\"])\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("v.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.iter().any(|f| f.severity == Severity::Critical),
            "subprocess.run([cmd, \"--flag\"]) (variable argv[0]) must stay Critical. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }

    /// `subprocess.run(["sh", "echo", x], shell=True)` — shell=True path
    /// is the always-shell branch of `severity_for`. Behavior under
    /// shell=True is unchanged by the §1 fix; the variable element `x`
    /// makes the list `MixedListLiteralArgv0`, which under shell APIs
    /// still maps to High.
    #[test]
    fn test_python_subprocess_shell_true_unchanged() {
        let content = "import subprocess\n\ndef go(x):\n    subprocess.run([\"sh\", \"echo\", x], shell=True)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("s.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings
                .iter()
                .any(|f| matches!(f.severity, Severity::High | Severity::Critical)),
            "shell=True with mixed list must still surface High/Critical. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }

    // ─────────────────────────────────────────────────────────────────
    // Phase 2d dual-branch integration tests.
    //
    // Mirrors the path_traversal 2b test layout:
    //
    // 1. `flag_off_emits_single_branch_unchanged` — opt-in promise.
    // 2. `flag_on_*_emits_dual_branch` — smoke.
    // 3. 2x2 matrix (predicted × actual).
    // 4. Pass B / Pass C / drop-Low skip invariants.
    //
    // Helper `run_dual_branch` flips the per-detector flag for
    // `command-injection` on, builds an AnalysisContext with mock files,
    // and runs `detect()`.
    // ─────────────────────────────────────────────────────────────────

    fn run_dual_branch(file: &str, content: &str) -> Vec<Finding> {
        use crate::config::DualBranchConfig;
        use std::collections::HashMap;

        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let mut detectors = HashMap::new();
        detectors.insert("command-injection".to_string(), true);
        let cfg = DualBranchConfig {
            enabled: true,
            detectors,
        };
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![(file, content)],
        )
        .with_dual_branch(cfg);
        detector.detect(&ctx).expect("detection should succeed")
    }

    #[test]
    fn flag_off_emits_single_branch_unchanged() {
        // Sanity: with flag off (default), Python command-injection
        // sites emit no `alternative_branch` and no
        // predictor-contributed reasons. Pins the "opt-in" promise.
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![(
                "vuln.py",
                "import os\n\
                 def handler(request):\n\
                 \x20   os.system(\"ls \" + request.GET[\"q\"])\n",
            )],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(!findings.is_empty(), "must still fire single-branch");
        for f in &findings {
            assert!(
                f.alternative_branch.is_none(),
                "no alternative_branch when flag off: {:?}",
                f.title
            );
            assert!(
                f.prediction_reasons.iter().all(|r| r.weight == 0.0),
                "no predictor-emitted (weight ≠ 0) reasons when flag off; \
                 only weight-0 graph-enrichment reasons are allowed. reasons: {:?}",
                f.prediction_reasons
                    .iter()
                    .map(|r| (&r.kind, r.weight))
                    .collect::<Vec<_>>()
            );
        }
    }

    #[test]
    fn flag_on_python_command_injection_emits_dual_branch() {
        // Smoke: flag on, Python os.system with request.GET → finding
        // has alternative_branch.
        let findings = run_dual_branch(
            "vuln.py",
            "import os\n\
             def handler(request):\n\
             \x20   os.system(\"ls \" + request.GET[\"q\"])\n",
        );
        assert!(!findings.is_empty(), "must fire dual-branch");
        let f = &findings[0];
        assert!(
            f.alternative_branch.is_some(),
            "alternative_branch must be populated when flag on. title={:?}",
            f.title
        );
        assert!(
            !f.prediction_reasons.is_empty(),
            "at least one prediction reason"
        );
    }

    // ── 2x2 matrix ──

    #[test]
    fn matrix_predicted_realbug_request_source() {
        // Canonical CWE-78: os.system with request-sourced arg inside a
        // handler. Strong RealBug signals: request-source (-0.50) +
        // handler-scope (-0.30). Severity → from the 2D table.
        let findings = run_dual_branch(
            "vuln.py",
            "import os\n\
             def handler(request):\n\
             \x20   os.system(\"ls \" + request.GET[\"q\"])\n",
        );
        assert!(!findings.is_empty());
        let f = &findings[0];
        assert!(
            matches!(f.severity, Severity::High | Severity::Critical),
            "predicted RealBug uses 2D severity table — interpolated arg \
             to os.system is High/Critical, got {:?}",
            f.severity
        );
        assert!(
            f.title.to_lowercase().contains("command injection"),
            "RealBug title; got {:?}",
            f.title
        );
        let alt = f.alternative_branch.as_ref().unwrap();
        assert_eq!(alt.label, crate::dual_branch::BranchLabel::Benign);
        assert_eq!(alt.severity, Severity::Info);
    }

    #[test]
    fn matrix_predicted_benign_all_literals_list() {
        // List form with all literals → +0.50 (W_ALL_LITERALS) +0.30
        // (W_ARGV0_LITERAL) = strong Benign. Severity → Info.
        let findings = run_dual_branch(
            "internal.py",
            "import subprocess\n\
             def cleanup():\n\
             \x20   subprocess.run([\"rm\", \"-rf\", \"/tmp/cache\"])\n",
        );
        assert!(!findings.is_empty(), "must surface (drop-Low disabled)");
        let f = findings
            .iter()
            .find(|f| f.is_dual_branch())
            .expect("must have at least one dual-branch finding");
        assert_eq!(
            f.severity,
            Severity::Info,
            "predicted Benign → Info, got {:?}",
            f.severity
        );
        let alt = f.alternative_branch.as_ref().unwrap();
        assert_eq!(alt.label, crate::dual_branch::BranchLabel::RealBug);
        assert!(
            !matches!(alt.severity, Severity::Info),
            "alternative carries non-Info severity (the original 2D \
             table interpretation); got {:?}",
            alt.severity
        );
    }

    #[test]
    fn matrix_predicted_benign_literal_argv0_param_tail() {
        // Fixed argv[0] literal + parameter in argv[1+] → Benign-lean
        // (W_ARGV0_LITERAL +0.30 vs no shell=True penalty). The
        // argument-injection (CWE-88) risk is real but not
        // shell-injection; predictor honestly leans Benign.
        let findings = run_dual_branch(
            "tool.py",
            "import subprocess\n\
             def lookup(name):\n\
             \x20   subprocess.run([\"grep\", name, \"/etc/passwd\"])\n",
        );
        assert!(!findings.is_empty());
        let f = findings
            .iter()
            .find(|f| f.is_dual_branch())
            .expect("dual-branch finding expected");
        assert_eq!(
            f.severity,
            Severity::Info,
            "literal argv0 + var tail leans Benign → Info; got {:?}",
            f.severity
        );
        let alt = f.alternative_branch.as_ref().unwrap();
        assert_eq!(alt.label, crate::dual_branch::BranchLabel::RealBug);
    }

    // ── Pass B / Pass C / drop-Low skip invariants ──

    #[test]
    fn dual_branch_finding_skips_pass_c_handler_boost() {
        // Function is named "handler" — under flag-off this would
        // trigger Pass C's High/Medium → Critical bump. Under flag-on
        // dual-branch findings opt out, so the predicted severity is
        // preserved (NOT bumped to Critical for predicted-Benign-Info,
        // and NOT bumped for predicted-RealBug-High either —
        // handler-scope is already encoded in the predictor weights).
        //
        // Test shape: a `subprocess.run([literal, literal])` call
        // inside `def handler()` should be predicted Benign-Info, and
        // Pass C must NOT promote it to Critical.
        let findings = run_dual_branch(
            "h.py",
            "import subprocess\n\
             def handler():\n\
             \x20   subprocess.run([\"ls\", \"-la\"])\n",
        );
        let f = findings
            .iter()
            .find(|f| f.is_dual_branch())
            .expect("dual-branch finding expected");
        assert_eq!(
            f.severity,
            Severity::Info,
            "Pass C skipped dual-branch findings — predicted Info \
             stays Info even in handler-scope. Got {:?}",
            f.severity
        );
    }

    #[test]
    fn dual_branch_drop_low_disabled_when_flag_on() {
        // Canonical "predicted RealBug but severity_for says Low" case:
        // `os.system` with a fully static literal command. The 2D table
        // returns Low (B6/B15 calibration), so under flag-off this gets
        // dropped by the drop-Low retain. Under flag-on the drop is
        // disabled so the (Info or Low) finding surfaces for audit.
        //
        // Note: static-literal os.system is *predicted Benign* by the
        // predictor (no negative signals + literal-form is positive),
        // so it lands at Info. The point of this test is that it
        // surfaces at all rather than getting silently dropped.
        let findings = run_dual_branch(
            "static.py",
            "import os\n\
             def cleanup():\n\
             \x20   os.system(\"rm -rf /tmp/cache\")\n",
        );
        assert!(
            findings.iter().any(|f| f.is_dual_branch()),
            "drop-Low must be disabled when flag-on — static-literal \
             call should still surface as a dual-branch finding. \
             Findings: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity, f.is_dual_branch()))
                .collect::<Vec<_>>()
        );
    }

    #[test]
    fn collapsing_annotation_command_static_forces_benign() {
        // Trailing annotation on the call line collapses to
        // Benign-Info regardless of other signals. Smoke test for D2
        // (parser wiring) and the resolution_signals plumbing.
        let findings = run_dual_branch(
            "annot.py",
            "import os\n\
             def handler(request):\n\
             \x20   os.system(\"ls \" + request.GET[\"q\"])  # repotoire: command-static[internal-tool]\n",
        );
        let f = findings
            .iter()
            .find(|f| f.is_dual_branch())
            .expect("dual-branch finding expected");
        assert_eq!(f.severity, Severity::Info, "annotation collapses to Info");
        assert!(
            !f.resolution_signals.is_empty(),
            "annotation must surface as a resolution_signal"
        );
    }

    #[test]
    fn collapsing_annotation_command_user_controlled_forces_realbug() {
        // The opposite annotation forces RealBug even on a fully
        // literal call. Severity comes from the 2D table.
        let findings = run_dual_branch(
            "annot.py",
            "import subprocess\n\
             def cleanup():\n\
             \x20   subprocess.run([\"rm\", \"/tmp/cache\"])  # repotoire: command-user-controlled[admin-route]\n",
        );
        let f = findings
            .iter()
            .find(|f| f.is_dual_branch())
            .expect("dual-branch finding expected");
        assert!(
            !matches!(f.severity, Severity::Info),
            "annotation forces RealBug, severity from 2D table, not \
             Info. Got {:?}",
            f.severity
        );
        assert!(
            !f.resolution_signals.is_empty(),
            "annotation must surface as a resolution_signal"
        );
    }

    // ─────────────────────────────────────────────────────────────────
    // Real-world signature pins.
    //
    // Mirrors Phase 2b's `click_utils_489_*` tests: take a canonical
    // call shape from a popular OSS Python project, reproduce it
    // verbatim, and assert the predictor's verdict. These tests serve
    // three purposes:
    //
    // 1. Document what the predictor actually does on real code,
    //    independent of the synthetic 2x2 matrix.
    // 2. Catch v0 limitations (e.g. local-variable bindings not
    //    classified) as documented expected behavior rather than
    //    silent mis-prediction.
    // 3. Pin signatures so future predictor changes (e.g. v1 with
    //    intra-function data flow) trigger test failures that force
    //    a decisions-doc update in the same commit.
    // ─────────────────────────────────────────────────────────────────

    #[test]
    fn real_gitpython_execute_signature() {
        // GitPython's `git.cmd.Git.execute()` builds a command list
        // and dispatches via subprocess.Popen with `shell=False`. The
        // canonical shape, simplified to fit the predictor's
        // intra-call evidence model:
        //
        //   def execute(self, command):
        //       proc = subprocess.Popen(
        //           ["git"] + command,
        //           stdout=subprocess.PIPE,
        //       )
        //
        // The predictor sees `["git"] + command` as a BinOp expression,
        // not a syntactic list literal. v0 list-form classification
        // only fires on a direct `List(...)` node, so neither
        // `argv_list_all_literals` nor `argv0_origin` are populated —
        // the call falls through to first-arg analysis where the
        // BinOp is `Unknown`. Net score = 0.0 → tiebreak → RealBug.
        //
        // This pins the v0 limitation (BinOp list concatenation is
        // unclassified) honestly. When v1 lands constant-folding for
        // `[literal] + ident`, this test will tighten to assert
        // W_ARGV0_IS_LITERAL fires.
        let findings = run_dual_branch(
            "gitpython_execute.py",
            "import subprocess\n\
             def execute(self, command):\n\
             \x20   return subprocess.Popen([\"git\"] + command, stdout=subprocess.PIPE)\n",
        );
        let f = findings
            .iter()
            .find(|f| f.is_dual_branch())
            .expect("dual-branch finding expected");
        // Structural invariants only — verdict is tiebreak-dependent.
        // Pin the dual-branch shape: alternative populated. Reasons
        // are deliberately not asserted (the comment above documents
        // the v0 reason-set; the v1 fix will add reasons).
        assert!(f.alternative_branch.is_some());
    }

    #[test]
    fn real_flask_handler_shell_true() {
        // Canonical Flask request-handler shape:
        //
        //   @app.route("/run")
        //   def run_command():
        //       cmd = request.args.get("cmd")
        //       subprocess.run(cmd, shell=True)
        //
        // String-form call (not list-form). `cmd` matches the local
        // parameter heuristic? No — `cmd` is bound from a method call
        // on `request`, not a function parameter. v0 limitation:
        // bare-identifier-as-first-arg falls through to `Unknown` for
        // string-form when `cmd` isn't in the param list, so the
        // FirstArgRequestSource signal doesn't fire on `cmd`.
        //
        // BUT `shell=True` (-0.40) + handler-scope (-0.30, function
        // name contains "run") = -0.70, decisively RealBug.
        //
        // This pins the "shell=True is enough on its own to flip the
        // verdict" invariant — a property the calibrated 2D severity
        // table also enforces.
        let findings = run_dual_branch(
            "flask_handler.py",
            "import subprocess\n\
             def run_command(request):\n\
             \x20   cmd = request.args.get(\"cmd\")\n\
             \x20   subprocess.run(cmd, shell=True)\n",
        );
        let f = findings
            .iter()
            .find(|f| f.is_dual_branch())
            .expect("dual-branch finding expected");
        assert!(
            matches!(f.severity, Severity::High | Severity::Critical),
            "shell=True + handler-named function → RealBug, severity \
             from 2D table (High/Critical). Got {:?}. Reasons: {:?}",
            f.severity,
            f.prediction_reasons
                .iter()
                .map(|r| (&r.kind, r.weight))
                .collect::<Vec<_>>()
        );
        assert!(
            f.prediction_reasons.iter().any(|r| matches!(
                &r.kind,
                crate::dual_branch::PredictionReasonKind::KeywordArgument { name, value }
                    if name == "shell" && value == "True"
            )),
            "KeywordArgument(shell=True) signal must fire on shell=True. Reasons: {:?}",
            f.prediction_reasons
                .iter()
                .map(|r| &r.kind)
                .collect::<Vec<_>>()
        );
    }

    #[test]
    fn real_pip_invoke_via_sys_executable() {
        // pip / setuptools / virtually every Python toolchain
        // invokes `sys.executable` in subprocess to re-enter Python:
        //
        //   def install_package(name):
        //       subprocess.run(
        //           [sys.executable, "-m", "pip", "install", name],
        //           check=True,
        //       )
        //
        // argv[0] = `sys.executable` — an attribute access, not a
        // literal. The list is NOT all literals (`name` is a
        // parameter). No shell=True. No request source.
        //
        // The v0 classifier reads argv[0] as `Argv0Origin::Other`
        // (attribute access is not a literal and not a bare
        // identifier matching the param list), so the +0.30
        // W_ARGV0_LITERAL doesn't fire. The all-literals signal
        // doesn't fire either (`name` is a param). Net score = 0.0
        // (no signals fired), so tiebreak kicks in.
        //
        // Pinning here documents the v0 verdict: the canonical
        // `subprocess.run([sys.executable, ...])` pattern surfaces
        // *something* under flag-on (no drop-Low), gives a tiebreak
        // verdict, and presents the alternative for review. This is
        // the realistic posture — we don't have data flow to know
        // whether the calling context is attacker-influenced.
        //
        // When v1 lands sys.executable-style classification (treat
        // attribute access on `sys` / `__builtins__` / etc. as
        // ConfigSource), this assertion will tighten.
        let findings = run_dual_branch(
            "pip_invoke.py",
            "import subprocess, sys\n\
             def install_package(name):\n\
             \x20   subprocess.run([sys.executable, \"-m\", \"pip\", \"install\", name], check=True)\n",
        );
        let f = findings
            .iter()
            .find(|f| f.is_dual_branch())
            .expect("dual-branch finding expected");
        // Structural invariants only — verdict is tiebreak-dependent.
        let alt = f
            .alternative_branch
            .as_ref()
            .expect("alternative_branch populated");
        // The alternative is a fully-formed branch (label + severity +
        // rationale). We don't pin the specific label because the
        // tiebreak direction is an implementation detail of the
        // predictor; if v1 lands sys.executable classification this
        // assertion's environment will change, but the invariant
        // (alternative is present and well-formed) will not.
        assert!(
            matches!(
                alt.label,
                crate::dual_branch::BranchLabel::RealBug | crate::dual_branch::BranchLabel::Benign
            ),
            "alternative branch label must be set"
        );
        assert!(
            !alt.title.is_empty() && !alt.description.is_empty(),
            "alternative branch must carry a title and description"
        );
    }
}