repotoire 0.7.0

//! Command-injection detector (CWE-78).
//!
//! Flags execution of OS commands where the command string or arguments
//! are user-controlled. Severity is decided per-call from the AST shape
//! of the argument(s), not from line-text reverse engineering.
//!
//! # Architecture
//!
//! Two scan paths, picked by file language (mirrors
//! `eval_detector.rs`, `cleartext_credentials.rs`, `secrets.rs`):
//!
//! 1. **AST path** (Python, JS, TS, JSX, TSX, Go): walks the tree-sitter
//!    parse tree looking for **call expressions** whose callee matches a
//!    known command-execution API. All other languages either take the
//!    line path (Ruby, PHP) or are out of scope.
//!
//!      - **Python**: `os.system`, `os.popen`/`popen2`/`popen3`/`popen4`,
//!        `subprocess.run`/`call`/`Popen`/`check_output`/`check_call`,
//!        `subprocess.getoutput`/`getstatusoutput` (implicit shell=True),
//!        `commands.getoutput`/`getstatusoutput` (Python-2 legacy),
//!        `pty.spawn`. For `subprocess.*`, the `shell=True` keyword (or
//!        the 9th positional arg of `Popen`) bumps severity. Method
//!        definitions (`def system(self): ...`) and method calls on
//!        non-matching receivers are AST-distinguished from the real
//!        callee shape and never fire.
//!
//!      - **JavaScript / TypeScript**: `child_process.exec`/`execSync`,
//!        `child_process.execFile`/`execFileSync`,
//!        `child_process.spawn`/`spawnSync`, `child_process.fork`,
//!        `shelljs.exec`, plus bare `exec` / `execSync` (destructured
//!        from `child_process`). Receiver chain walks support
//!        `this.cp.exec(...)`, `require('child_process').exec(...)`, and
//!        `(await import('child_process')).exec(...)`.
//!
//!      - **Go**: `exec.Command(name, args...)`,
//!        `exec.CommandContext(ctx, name, args...)`,
//!        `syscall.Exec`/`syscall.StartProcess`. The canonical
//!        shell-injection form `exec.Command("sh", "-c", USER_VAR)` is
//!        recognized by inspecting arg[0] (shell name) + arg[1] (`-c`
//!        / `/c`) + arg[2] (variable) and bumped to Critical.
//!
//!    Severity is decided by classifying the relevant argument(s) into
//!    a `CommandArgKind`:
//!
//!    | API                              | StaticLiteral | StaticList | Interpolated | UserVariable | MixedList (var argv[0]) | MixedList (literal argv[0]) | Unknown |
//!    | -------------------------------- | ------------- | ---------- | ------------ | ------------ | ----------------------- | --------------------------- | ------- |
//!    | `os.system` / `os.popen`         | Low           | n/a        | Critical     | Critical     | n/a                     | n/a                         | High    |
//!    | `subprocess.* shell=True`        | Low           | n/a        | Critical     | Critical     | n/a                     | n/a                         | High    |
//!    | `subprocess.* shell=False`       | Low           | Low        | High         | High         | Critical                | High                        | High    |
//!    | `subprocess.getoutput` (shell)   | Low           | n/a        | Critical     | Critical     | n/a                     | n/a                         | High    |
//!    | `child_process.exec`             | Low           | n/a        | Critical     | Critical     | n/a                     | n/a                         | High    |
//!    | `child_process.execFile/spawn`   | Low           | Low        | High         | High         | Critical                | High                        | High    |
//!    | `exec.Command "sh" "-c" VAR`     | n/a           | n/a        | n/a          | Critical     | n/a                     | n/a                         | n/a     |
//!    | `exec.Command VAR ...`           | Low           | Low        | High         | High         | Critical                | High                        | High    |
//!
//!    `Low` findings are filtered out from the final result by default
//!    (matches the legacy detector behavior; same filter as
//!    `eval_detector::detect()`). The taint pass at the end of
//!    `detect()` may upgrade an existing finding to Critical when an
//!    unsanitized user-input dataflow is confirmed.
//!
//!    A route-handler severity boost (function name contains `handler`/
//!    `route`/`endpoint`/`view`/`controller`/`middleware`/`request`/
//!    `response`, or matches `^(get|post|put|delete|patch|head|options)
//!    [A-Z]`) bumps `High`/`Medium` findings to `Critical`. This mirrors
//!    `eval_detector`'s B8 fix.
//!
//! 2. **Line path** (Ruby, PHP, Java, sh, ...): for languages without a
//!    tree-sitter grammar in our dispatch list, a small line-based
//!    regex scanner matches canonical Ruby/PHP/Java forms. This is the
//!    same concession `eval_detector::scan_file_line` makes. We accept
//!    a higher false-positive rate here as a known trade-off.
//!
//!      - **Ruby**: `system(...)`, `exec(...)`, `` `cmd #{var}` `` (backticks
//!        with interpolation). Backtick strings are detected by a
//!        line-text inspection for `` ` `` + `#{`.
//!      - **PHP**: `system($_GET[...])`, `shell_exec`, `passthru`,
//!        `proc_open`, plus `exec(...)` (the standalone PHP function).
//!      - **Java**: `Runtime.getRuntime().exec(...)`, `new ProcessBuilder(...)`.
//!
//! This is the structural counterpart of:
//!   - the cleartext-credentials AST migration (commit `4c656b2f`),
//!   - the eval-detector AST migration (commit `ac8400c6`, audit
//!     follow-up `474e6cb5`).
//!
//! The previous detector ran a line-based regex over raw text and then
//! needed bespoke filters (`def exec`, `.exec(`, `# os.system(`,
//! `RegExp.exec`, `String.prototype.exec`, ...) to suppress 5+
//! "exec/system substring is not a call" false-positive shapes. With
//! the AST, "is this token a call expression with `os.system` as its
//! callee" is a free piece of information and that filter chain
//! disappears.

use crate::detectors::ast_fingerprint::parse_root_ext;
use crate::detectors::ast_walk::AstWalkCtx;
use crate::detectors::base::Detector;
use crate::detectors::fast_search::{
    contains_any, find_in, FIND_CHILD_PROCESS, FIND_COMMANDS_GETOUTPUT,
    FIND_COMMANDS_GETSTATUSOUTPUT, FIND_EXEC_ASYNC, FIND_EXEC_COMMAND, FIND_EXEC_PAREN,
    FIND_EXEC_SYNC, FIND_GETOUTPUT_PAREN, FIND_OS_EXEC_IMPORT, FIND_OS_POPEN, FIND_OS_SYSTEM,
    FIND_PASSTHRU_PAREN, FIND_POPEN_PAREN, FIND_PROCESS_BUILDER, FIND_PROC_OPEN, FIND_PTY_SPAWN,
    FIND_RUNTIME_GETRUNTIME, FIND_SHELLJS, FIND_SHELL_EXEC, FIND_SHELL_TRUE, FIND_SHELL_TRUE_JS,
    FIND_SPAWN_SYNC, FIND_SUBPROCESS, FIND_SYSCALL_DOT, FIND_SYSTEM_PAREN,
};
use crate::detectors::security::ast_helpers::{
    collect_named_args, node_text, python_kwarg_truthy,
    receiver_chain_label as receiver_chain_label_shared, receiver_chain_label_go, unwrap_callee,
};
use crate::detectors::security::scan_inputs::{ScanAstInputs, ScanInputs};
use crate::detectors::taint::{TaintAnalysisResult, TaintAnalyzer, TaintCategory};
use crate::graph::GraphQueryExt;
use crate::models::{Finding, Severity};
use crate::parsers::lightweight::Language;
use anyhow::Result;
use regex::Regex;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::LazyLock;

// ---------------------------------------------------------------------------
// Single source of truth for supported file extensions.
// ---------------------------------------------------------------------------

/// Extensions this detector processes. Mirrors `cleartext_credentials.rs`
/// and `eval_detector.rs` `SUPPORTED_EXTS` so AST/line dispatch stays
/// aligned.
///
/// AST-eligible extensions flow through `scan_file_ast`; the rest fall
/// through to `scan_file_line`.
const SUPPORTED_EXTS: &[&str] = &[
    // AST path
    "py", "js", "ts", "jsx", "tsx", "go", // Line path
    "rb", "php", "java", "sh",
];

/// AST-eligible extensions. A file with one of these extensions flows
/// through `scan_file_ast`; everything else in `SUPPORTED_EXTS` flows
/// through `scan_file_line`.
const AST_EXTS: &[&str] = &["py", "js", "ts", "jsx", "tsx", "go"];

// ---------------------------------------------------------------------------
// Argument-shape classification
// ---------------------------------------------------------------------------

/// What kind of value is being passed as a command argument.
///
/// The AST-native equivalent of the legacy line-based filter chain
/// (`FIND_REQ_BODY` / `FIND_PARAMS_BRACKET` / `FIND_DOLLAR_BRACE` / ...).
/// Computed from the AST, this is both more precise (no string-tracking
/// gymnastics to know "is this a literal") and more general (handles
/// template literals, f-strings, ternaries, await, TS casts, ...).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum CommandArgKind {
    /// String literal with no interpolation — `os.system("date")`.
    StaticLiteral,
    /// List/array all of whose elements are static literals —
    /// `subprocess.run(["git", "status"])`.
    StaticList,
    /// String with variable interpolation — Python f-string with
    /// interpolation, JS template literal with substitution, or string
    /// concatenation that includes a non-literal operand.
    Interpolated,
    /// Identifier / attribute / subscript / call result —
    /// `os.system(user_input)`, `exec(req.body.cmd)`.
    UserVariable,
    /// List/array with at least one variable element AND a variable in
    /// position 0 — `subprocess.run([user_bin, "--flag"])`. The
    /// attacker chooses which binary runs.
    MixedListVarArgv0,
    /// List/array with at least one variable element but argv[0] is a
    /// static literal — `subprocess.run(["git", user_arg])`. Less
    /// dangerous than `MixedListVarArgv0` because the binary is fixed.
    MixedListLiteralArgv0,
    /// Function expression / arrow / lambda. Almost never seen in
    /// command APIs but handled for completeness.
    FunctionLike,
    /// Anything we don't classify. Defaults to `High`.
    Unknown,
}

/// Which command-exec API was matched at this site.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum CommandApi {
    /// `os.system`, `os.popen` (always shell-exec).
    PyOsSystem,
    PyOsPopen,
    /// `subprocess.run`/`call`/`Popen`/`check_output`/`check_call` with
    /// `shell=False` (or shell unspecified).
    PySubprocessNoShell,
    /// `subprocess.*` with `shell=True`.
    PySubprocessShell,
    /// `subprocess.getoutput` / `getstatusoutput` — implicit shell=True.
    PySubprocessGetOutput,
    /// `pty.spawn(cmd)`.
    PyPtySpawn,
    /// `commands.getoutput` / `getstatusoutput` (Python-2 legacy).
    PyCommandsGetOutput,
    /// JS `child_process.exec` / `execSync` — always shell=True.
    JsChildProcessExec,
    /// JS `child_process.execFile` / `execFileSync` — argv[0] is the
    /// binary path; no shell.
    JsChildProcessExecFile,
    /// JS `child_process.spawn` / `spawnSync` — argv[0] is the binary
    /// path; no shell.
    JsChildProcessSpawn,
    /// JS `child_process.fork(modulePath, ...)`.
    JsChildProcessFork,
    /// `shelljs.exec(cmd)`.
    JsShellJsExec,
    /// Go `exec.Command(name, args...)` with `name == "sh"` etc and
    /// `args[0] == "-c"`/`"/c"` and a variable in args[1] — the
    /// canonical shell-injection form.
    GoExecCommandShellC,
    /// Go `exec.Command(name, args...)` general form.
    GoExecCommand,
    /// Go `syscall.Exec`/`syscall.StartProcess`.
    GoSyscallExec,
}

impl CommandApi {
    fn callee_label(self) -> &'static str {
        match self {
            CommandApi::PyOsSystem => "os.system",
            CommandApi::PyOsPopen => "os.popen",
            CommandApi::PySubprocessNoShell => "subprocess",
            CommandApi::PySubprocessShell => "subprocess (shell=True)",
            CommandApi::PySubprocessGetOutput => "subprocess.getoutput",
            CommandApi::PyPtySpawn => "pty.spawn",
            CommandApi::PyCommandsGetOutput => "commands.getoutput",
            CommandApi::JsChildProcessExec => "child_process.exec",
            CommandApi::JsChildProcessExecFile => "child_process.execFile",
            CommandApi::JsChildProcessSpawn => "child_process.spawn",
            CommandApi::JsChildProcessFork => "child_process.fork",
            CommandApi::JsShellJsExec => "shelljs.exec",
            CommandApi::GoExecCommandShellC => "exec.Command(\"sh\", \"-c\", ...)",
            CommandApi::GoExecCommand => "exec.Command",
            CommandApi::GoSyscallExec => "syscall.Exec",
        }
    }

    /// Is this API "always shell" (i.e. argv[0] is interpreted by /bin/sh)?
    fn is_shell_api(self) -> bool {
        matches!(
            self,
            CommandApi::PyOsSystem
                | CommandApi::PyOsPopen
                | CommandApi::PySubprocessShell
                | CommandApi::PySubprocessGetOutput
                | CommandApi::PyCommandsGetOutput
                | CommandApi::JsChildProcessExec
                | CommandApi::JsShellJsExec
                | CommandApi::GoExecCommandShellC
        )
    }

    /// Severity for this API + arg shape combination. See the table in
    /// the module-level doc.
    ///
    /// `literal_text` is the contents of a `StaticLiteral` argument, when
    /// available. For shell-mode APIs, a literal containing shell
    /// metacharacters (`;`, `&&`, `$(`, ...) is a real injection vector
    /// even with no variable, so we bump it to `Medium` (audit B15).
    fn severity_for(self, kind: CommandArgKind, literal_text: Option<&str>) -> Severity {
        // The shell-c form is its own beast: only arg[2] is variable
        // and that's the whole point — Critical.
        if self == CommandApi::GoExecCommandShellC {
            return Severity::Critical;
        }

        // B15: shell-mode API + static-literal command containing
        // metacharacters is a real injection vector, not a benign
        // literal. Bump from Low → Medium.
        if self.is_shell_api() && kind == CommandArgKind::StaticLiteral {
            if let Some(text) = literal_text {
                if !shell_metachars_in(text).is_empty() {
                    return Severity::Medium;
                }
            }
        }

        if self.is_shell_api() {
            match kind {
                CommandArgKind::StaticLiteral => Severity::Low,
                CommandArgKind::StaticList => Severity::Low,
                CommandArgKind::Interpolated | CommandArgKind::UserVariable => Severity::Critical,
                // For shell APIs, lists are unusual but treat as a
                // mixed bag — shell still interprets argv[0].
                CommandArgKind::MixedListVarArgv0 => Severity::Critical,
                CommandArgKind::MixedListLiteralArgv0 => Severity::High,
                CommandArgKind::FunctionLike => Severity::Low,
                CommandArgKind::Unknown => Severity::High,
            }
        } else {
            match kind {
                CommandArgKind::StaticLiteral => Severity::Low,
                CommandArgKind::StaticList => Severity::Low,
                CommandArgKind::Interpolated | CommandArgKind::UserVariable => Severity::High,
                CommandArgKind::MixedListVarArgv0 => Severity::Critical,
                CommandArgKind::MixedListLiteralArgv0 => Severity::High,
                CommandArgKind::FunctionLike => Severity::Low,
                CommandArgKind::Unknown => Severity::High,
            }
        }
    }
}

// ---------------------------------------------------------------------------
// Detector
// ---------------------------------------------------------------------------

/// Detects command-injection sinks (CWE-78).
pub struct CommandInjectionDetector {
    repository_path: PathBuf,
    max_findings: usize,
    taint_analyzer: TaintAnalyzer,
    precomputed_cross: std::sync::OnceLock<Vec<crate::detectors::taint::TaintPath>>,
    precomputed_intra: std::sync::OnceLock<Vec<crate::detectors::taint::TaintPath>>,
}

impl CommandInjectionDetector {
    pub fn new(repository_path: impl Into<PathBuf>) -> Self {
        Self {
            repository_path: repository_path.into(),
            max_findings: 50,
            taint_analyzer: TaintAnalyzer::new(),
            precomputed_cross: std::sync::OnceLock::new(),
            precomputed_intra: std::sync::OnceLock::new(),
        }
    }

    fn relative_path(&self, path: &Path) -> PathBuf {
        crate::detectors::detector_relative_path(&self.repository_path, path)
    }

    /// AST-first scanner. Walks the tree once. For every call whose
    /// callee is a known command-exec API, classifies the argument
    /// shape and emits a finding at the appropriate severity.
    fn scan_file_ast(&self, inputs: &ScanAstInputs<'_>) -> Vec<Finding> {
        let path = inputs.path();
        let content = inputs.content();
        let ext = inputs.ext();
        let lang = inputs.lang;
        let cached_tree = inputs.cached_tree;
        let mut findings = vec![];

        if content.contains('\0') {
            return findings;
        }

        let owned;
        let root = match cached_tree {
            Some(tree) => tree.root_node(),
            None => match parse_root_ext(content, lang, ext) {
                Some(t) => {
                    owned = t;
                    owned.root_node()
                }
                None => return findings,
            },
        };

        let bytes = content.as_bytes();
        let lines: Vec<&str> = content.lines().collect();
        let mut sites: Vec<CommandSite> = Vec::new();
        let go_aliases = if matches!(lang, Language::Go) {
            collect_go_import_aliases(root, bytes)
        } else {
            GoImportAliases::default()
        };
        // Per-file Python from-import alias map: resolves
        // `from os import system; system(cmd)` and similar bare-call
        // shapes that the attribute-only matcher would otherwise miss.
        // Closes deferred B10 from `3c88328e`. Mirrors `insecure_crypto`'s
        // pattern (commit `32021903`).
        let py_aliases = if matches!(lang, Language::Python) {
            super::python_imports::collect_python_from_imports(root, bytes)
        } else {
            HashMap::new()
        };
        // Per-file Python module-alias map: resolves
        // `import subprocess as sp; sp.run(cmd, shell=True)` by mapping
        // the attribute-receiver text `sp` back to the canonical
        // module name `subprocess` before matching. Symmetric with
        // `py_aliases` (which handles the from-import shape) — see
        // `python_imports::collect_python_module_aliases`.
        let py_module_aliases = if matches!(lang, Language::Python) {
            super::python_imports::collect_python_module_aliases(root, bytes)
        } else {
            HashMap::new()
        };
        let ctx = AstWalkCtx {
            lang,
            source: bytes,
        };
        let aliases = super::python_imports::PythonAliases::new(&py_aliases, &py_module_aliases);
        collect_command_sites(&ctx, root, &go_aliases, &aliases, &mut sites);

        for site in sites {
            if findings.len() >= self.max_findings {
                break;
            }
            let line_idx = site.call_node.start_position().row;
            if let Some(line) = lines.get(line_idx) {
                let prev = if line_idx > 0 {
                    Some(lines[line_idx - 1])
                } else {
                    None
                };
                if crate::detectors::is_line_suppressed(line, prev) {
                    continue;
                }
            }
            let snippet = lines.get(line_idx).map(|s| s.trim()).unwrap_or("");
            let line_num = (line_idx + 1) as u32;
            let severity = site
                .api
                .severity_for(site.arg_kind, site.target_text.as_deref());

            findings.push(self.build_finding(
                path,
                line_num,
                site.api,
                site.arg_kind,
                severity,
                snippet,
                ext,
                site.target_text.as_deref(),
            ));
        }

        findings
    }

    /// Legacy line scanner for non-AST languages (Ruby, PHP, Java, sh).
    ///
    /// For these formats reverse-engineering line shape is the only
    /// option. We accept a higher false-positive rate here as a known
    /// concession; the canonical command-exec forms in these languages
    /// are simple enough that line-based regex is sufficient in
    /// practice.
    ///
    /// **Path used by**: `.rb`, `.php`, `.java`, `.sh`. AST-eligible
    /// extensions flow through `scan_file_ast` instead.
    fn scan_file_line(&self, inputs: &ScanInputs<'_>) -> Vec<Finding> {
        let path = inputs.path;
        let content = inputs.content;
        let ext = inputs.ext;
        let mut findings = vec![];
        if content.len() > 500_000 {
            return findings;
        }
        let lines: Vec<&str> = content.lines().collect();
        for (i, line) in lines.iter().enumerate() {
            if findings.len() >= self.max_findings {
                break;
            }
            let prev = if i > 0 { Some(lines[i - 1]) } else { None };
            if crate::detectors::is_line_suppressed(line, prev) {
                continue;
            }
            let trimmed = line.trim_start();
            // Strip Ruby `#`, PHP `//` / `#`, Java/sh `//` comments.
            if trimmed.starts_with('#') || trimmed.starts_with("//") {
                continue;
            }
            if let Some((api, arg_kind)) = match_line_command(line, ext) {
                let line_num = (i + 1) as u32;
                // B15 metachar inspection is not plumbed through the
                // line-scan path: `match_line_command` returns a kind but
                // not the verbatim literal text. The AST path covers the
                // same APIs for Python/JS/Go which is where the bulk of
                // B15-relevant code lives. Tracked as a follow-up.
                let severity = api.severity_for(arg_kind, None);
                findings.push(self.build_finding(
                    path,
                    line_num,
                    api,
                    arg_kind,
                    severity,
                    line.trim(),
                    ext,
                    None,
                ));
            }
        }
        findings
    }

    /// Construct a `Finding` for a detected command-exec site.
    fn build_finding(
        &self,
        path: &Path,
        line_num: u32,
        api: CommandApi,
        arg_kind: CommandArgKind,
        severity: Severity,
        snippet: &str,
        ext: &str,
        literal_text: Option<&str>,
    ) -> Finding {
        let api_name = api.callee_label();
        // B15: when the argument is a static literal but contains shell
        // metacharacters under a shell-mode API, surface the metachar in
        // the title so reviewers immediately see why the literal is not
        // benign.
        let metachars = literal_text
            .filter(|_| api.is_shell_api() && arg_kind == CommandArgKind::StaticLiteral)
            .map(|t| shell_metachars_in(t))
            .unwrap_or_default();
        let title = if let Some(first) = metachars.first() {
            format!(
                "Potential command injection via {} (shell metacharacter `{}` in literal)",
                api_name, first,
            )
        } else {
            format!("Potential command injection via {}", api_name)
        };
        let arg_desc = match arg_kind {
            CommandArgKind::StaticLiteral if !metachars.is_empty() => {
                "static string literal containing shell metacharacters (RCE risk)"
            }
            CommandArgKind::StaticLiteral => "static string literal (low risk)",
            CommandArgKind::StaticList => "list of static literals (low risk)",
            CommandArgKind::Interpolated => "string with variable interpolation (RCE risk)",
            CommandArgKind::UserVariable => "user-controlled expression (RCE risk)",
            CommandArgKind::MixedListVarArgv0 => "list whose argv[0] is variable (RCE risk)",
            CommandArgKind::MixedListLiteralArgv0 => {
                "list mixing literals and variables (RCE risk)"
            }
            CommandArgKind::FunctionLike => "function value",
            CommandArgKind::Unknown => "non-static argument",
        };
        let lang_label = match ext {
            "py" => "python",
            "js" | "jsx" => "javascript",
            "ts" | "tsx" => "typescript",
            "rb" => "ruby",
            "php" => "php",
            "go" => "go",
            "java" => "java",
            "sh" => "bash",
            _ => "",
        };
        let description = format!(
            "**Potential Command Injection (CWE-78)**\n\n\
             **API**: `{}`\n\n\
             **Argument shape**: {}\n\n\
             **Location**: {}:{}\n\n\
             **Code snippet**:\n```{}\n{}\n```\n\n\
             OS-command-execution APIs run their argument as a shell or \
             argv list. When that argument is anything other than a \
             constant the program author controls at write time, \
             attackers who can influence the value get arbitrary \
             command execution.",
            api_name,
            arg_desc,
            path.display(),
            line_num,
            lang_label,
            snippet,
        );
        let suggested_fix = self.recommend(api, ext);

        Finding {
            id: String::new(),
            detector: "CommandInjectionDetector".to_string(),
            severity,
            title,
            description,
            affected_files: vec![self.relative_path(path)],
            line_start: Some(line_num),
            line_end: Some(line_num),
            suggested_fix: Some(suggested_fix),
            estimated_effort: Some("45 minutes".to_string()),
            category: Some("security".to_string()),
            cwe_id: Some("CWE-78".to_string()),
            why_it_matters: Some(
                "Attackers could execute arbitrary system commands by injecting shell \
                 metacharacters or by choosing the binary that runs."
                    .to_string(),
            ),
            ..Default::default()
        }
    }

    /// Per-API remediation guidance.
    fn recommend(&self, api: CommandApi, ext: &str) -> String {
        match (api, ext) {
            (CommandApi::PyOsSystem | CommandApi::PyOsPopen, _) => {
                "Avoid `os.system` / `os.popen` — they spawn a shell and pass the \
                 string through it.\n\n\
                 - Prefer `subprocess.run([\"cmd\", \"arg\"], shell=False)` with a \
                 fixed argv[0] and untrusted input only as later argv elements.\n\
                 - Validate any user-controlled argv[0] against an allowlist."
                    .to_string()
            }
            (
                CommandApi::PySubprocessShell
                | CommandApi::PySubprocessGetOutput
                | CommandApi::PyCommandsGetOutput,
                _,
            ) => "Avoid `subprocess` with `shell=True` (or `getoutput`, which is \
                 implicit shell=True). Replace with the list form: \
                 `subprocess.run([\"cmd\", \"arg\"], shell=False)`."
                .to_string(),
            (CommandApi::PySubprocessNoShell, _) => {
                "Use a fixed-string argv[0]. Validate any user-controlled argv[0] \
                 against an allowlist of allowed binaries — even without shell=True \
                 the attacker can otherwise choose which program runs."
                    .to_string()
            }
            (CommandApi::PyPtySpawn, _) => {
                "Avoid `pty.spawn` on user-controlled command strings. Use a fixed \
                 binary and pass user data as later argv elements."
                    .to_string()
            }
            (
                CommandApi::JsChildProcessExec | CommandApi::JsShellJsExec,
                "js" | "ts" | "jsx" | "tsx",
            ) => "Avoid `child_process.exec` (it always spawns a shell). Use \
                 `child_process.execFile` or `spawn` with `[binary, args]` and \
                 `shell: false`. Never interpolate user input into a command \
                 string."
                .to_string(),
            (
                CommandApi::JsChildProcessExecFile
                | CommandApi::JsChildProcessSpawn
                | CommandApi::JsChildProcessFork,
                "js" | "ts" | "jsx" | "tsx",
            ) => "Use a fixed binary path for argv[0]. Validate any \
                 user-controlled argv[0] against an allowlist — even without \
                 `shell: true` the attacker can otherwise choose which program \
                 runs."
                .to_string(),
            (CommandApi::GoExecCommandShellC, _) => {
                "`exec.Command(\"sh\", \"-c\", userInput)` is a textbook \
                 shell-injection sink. Use `exec.Command(binary, arg1, arg2)` with \
                 a fixed binary and pass user input as later arguments. Never \
                 build a shell command string from user data."
                    .to_string()
            }
            (CommandApi::GoExecCommand | CommandApi::GoSyscallExec, _) => {
                "Use a fixed binary path for the first argument. Validate any \
                 user-controlled argv[0] against an allowlist of allowed \
                 binaries. Use `filepath.Clean` for paths."
                    .to_string()
            }
            (_, "rb") => "Avoid `system` / `exec` / backtick-strings on user input. \
                 Use `Open3.capture2(['cmd', arg])` with an array form."
                .to_string(),
            (_, "php") => "Avoid `system` / `shell_exec` / `passthru` / `proc_open` / `exec` \
                 on user input. If you must shell out, validate with \
                 `escapeshellarg`/`escapeshellcmd` and use a fixed command."
                .to_string(),
            (_, "java") => "Use `ProcessBuilder` with an explicit argv list and a \
                 fixed binary; never concatenate user input into the command \
                 string."
                .to_string(),
            _ => "Avoid passing user-controlled data to OS-command APIs.".to_string(),
        }
    }
}

impl Detector for CommandInjectionDetector {
    fn name(&self) -> &'static str {
        "command-injection"
    }
    fn description(&self) -> &'static str {
        "Detects command injection vulnerabilities (AST-first; CWE-78)"
    }

    fn bypass_postprocessor(&self) -> bool {
        true
    }

    crate::detectors::impl_taint_precompute!();

    fn taint_category(&self) -> Option<crate::detectors::taint::TaintCategory> {
        Some(TaintCategory::CommandInjection)
    }

    fn file_extensions(&self) -> &'static [&'static str] {
        SUPPORTED_EXTS
    }

    fn content_requirements(&self) -> crate::detectors::detector_context::ContentFlags {
        crate::detectors::detector_context::ContentFlags::HAS_EXEC
    }

    fn detect(
        &self,
        ctx: &crate::detectors::analysis_context::AnalysisContext,
    ) -> Result<Vec<Finding>> {
        let graph = ctx.graph;
        let files = &ctx.as_file_provider();
        let mut findings: Vec<Finding> = vec![];

        // Run taint analysis for command injection (precomputed or fallback).
        let mut taint_paths = if let Some(cross) = self.precomputed_cross.get() {
            cross.clone()
        } else {
            self.taint_analyzer
                .trace_taint(graph, TaintCategory::CommandInjection)
        };
        let intra_paths = if let Some(intra) = self.precomputed_intra.get() {
            intra.clone()
        } else {
            crate::detectors::taint::run_intra_function_taint(
                &self.taint_analyzer,
                graph,
                TaintCategory::CommandInjection,
                &self.repository_path,
            )
        };
        taint_paths.extend(intra_paths);
        let taint_result = TaintAnalysisResult::from_paths(taint_paths);

        for path in files.files_with_extensions(SUPPORTED_EXTS) {
            if findings.len() >= self.max_findings {
                break;
            }

            let raw = match files.content(path) {
                Some(c) => c,
                None => continue,
            };
            let raw_str: &str = &raw;

            // Cheap pre-filter: skip files without any command-exec keyword.
            // Substrings cover every callee name we match, plus the
            // `shell=` keyword so we don't miss `subprocess.run(x, shell=True)`
            // in a file that imports subprocess by alias.
            if !contains_any(COMMAND_KEYWORD_FINDERS, raw_str) && !raw_str.contains('`') {
                continue;
            }

            if raw.len() > 500_000 {
                continue;
            }

            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
            let has_ast_grammar = AST_EXTS.contains(&ext);

            let new_findings = if has_ast_grammar {
                let cached = files.tree(path);
                let lang = Language::from_path(path);
                let scan = ScanInputs::new(path, raw_str, ext);
                let ast_inputs = ScanAstInputs::new(scan, lang, cached.as_deref());
                self.scan_file_ast(&ast_inputs)
            } else {
                let scan = ScanInputs::new(path, raw_str, ext);
                self.scan_file_line(&scan)
            };
            findings.extend(new_findings);
        }

        // Severity refinement via taint analysis.
        for finding in &mut findings {
            let file_path = finding
                .affected_files
                .first()
                .map(|p| p.to_string_lossy().to_string())
                .unwrap_or_default();
            let line = finding.line_start.unwrap_or(0);
            for taint in &taint_result.paths {
                if (taint.sink_file == file_path || taint.source_file == file_path)
                    && (taint.sink_line == line || taint.source_line == line)
                {
                    if taint.is_sanitized {
                        finding.severity = Severity::Low;
                        finding.description = format!(
                            "{}\n\n**Taint Analysis Note**: A sanitizer function (`{}`) \
                             was found in the data flow path, which may mitigate this \
                             vulnerability.",
                            finding.description,
                            taint.sanitizer.as_deref().unwrap_or("unknown")
                        );
                    } else {
                        finding.severity = Severity::Critical;
                        finding.description = format!(
                            "{}\n\n**Taint Analysis Confirmed**: Data flow analysis \
                             traced a path from user input to this command-execution \
                             sink without sanitization:\n\n`{}`",
                            finding.description,
                            taint.path_string()
                        );
                    }
                    break;
                }
            }
        }

        // Severity boost when the call appears in a request-handler
        // function. Mirrors `eval_detector` B8 (camelCase verb-prefix
        // handler matching).
        static HANDLER_VERB_RE: LazyLock<Regex> = LazyLock::new(|| {
            Regex::new(r"^(get|post|put|delete|patch|head|options)[A-Z]").expect("valid regex")
        });
        for finding in &mut findings {
            if !matches!(finding.severity, Severity::High | Severity::Medium) {
                continue;
            }
            if let (Some(file_path), Some(line)) =
                (finding.affected_files.first(), finding.line_start)
            {
                let path_str = file_path.to_string_lossy().to_string();
                let i = graph.interner();
                if let Some(func) = graph.find_function_at(&path_str, line) {
                    let raw_name = func.node_name(i);
                    let name_lower = raw_name.to_lowercase();
                    let is_route = name_lower.contains("handler")
                        || name_lower.contains("route")
                        || name_lower.contains("endpoint")
                        || name_lower.contains("view")
                        || name_lower.contains("controller")
                        || name_lower.contains("middleware")
                        || name_lower.contains("request")
                        || name_lower.contains("response")
                        || HANDLER_VERB_RE.is_match(raw_name);
                    if is_route {
                        finding.severity = Severity::Critical;
                    }
                }
            }
        }

        // Drop Low findings (static literals, sanitized paths). Caller-
        // controlled constants are not actionable without context the
        // detector doesn't have.
        findings.retain(|f| f.severity != Severity::Low);

        Ok(findings)
    }
}

impl crate::detectors::RegisteredDetector for CommandInjectionDetector {
    fn create(init: &crate::detectors::DetectorInit) -> std::sync::Arc<dyn Detector> {
        std::sync::Arc::new(Self::new(init.repo_path))
    }
}

// ---------------------------------------------------------------------------
// Pre-filter
// ---------------------------------------------------------------------------

/// Cheap pre-filter: does this file contain any command-exec keyword
/// at all? Avoids the cost of a full parse on files that can't possibly
/// have a command-injection site.
///
/// Each callee name we match in `match_*_call` MUST be covered by a
/// substring here, otherwise the AST scan will never run on a file
/// containing only that callee.
///
/// Note: Ruby backticks (`` ` ``) are checked separately at the call site
/// via `content.contains('`')` because backtick is a single byte and
/// `Finder::new(b"`")` would be wasteful.
static COMMAND_KEYWORD_FINDERS: &[&LazyLock<memchr::memmem::Finder<'static>>] = &[
    &FIND_OS_SYSTEM,
    &FIND_OS_POPEN,
    &FIND_SUBPROCESS,
    &FIND_CHILD_PROCESS,
    &FIND_EXEC_SYNC,
    &FIND_EXEC_ASYNC,
    &FIND_SPAWN_SYNC,
    &FIND_SHELL_EXEC,
    &FIND_PROC_OPEN,
    &FIND_EXEC_COMMAND,
    &FIND_RUNTIME_GETRUNTIME,
    &FIND_PROCESS_BUILDER,
    &FIND_SHELL_TRUE,
    &FIND_SHELL_TRUE_JS,
    &FIND_EXEC_PAREN,
    &FIND_GETOUTPUT_PAREN,
    &FIND_SYSTEM_PAREN,
    &FIND_PASSTHRU_PAREN,
    &FIND_POPEN_PAREN,
    &FIND_SYSCALL_DOT,
    &FIND_SHELLJS,
    &FIND_PTY_SPAWN,
    &FIND_COMMANDS_GETOUTPUT,
    &FIND_COMMANDS_GETSTATUSOUTPUT,
    &FIND_OS_EXEC_IMPORT,
];

// ---------------------------------------------------------------------------
// AST walking
// ---------------------------------------------------------------------------

/// One command-injection-shaped call site we want to emit.
struct CommandSite<'a> {
    /// The call_expression node — used for line lookup.
    call_node: tree_sitter::Node<'a>,
    /// Which API was matched.
    api: CommandApi,
    /// Classified shape of the relevant argument.
    arg_kind: CommandArgKind,
    /// Verbatim source text of the classified argument, when it was a
    /// `StaticLiteral`. Used by the severity classifier to inspect the
    /// literal for shell metacharacters (audit B15). `None` for non-
    /// literal shapes (Interpolated, UserVariable, ...) since those
    /// already classify above Low.
    target_text: Option<String>,
}

/// Walk the tree and emit a `CommandSite` for every dangerous-API call.
fn collect_command_sites<'a>(
    ctx: &AstWalkCtx<'a>,
    node: tree_sitter::Node<'a>,
    go_aliases: &GoImportAliases,
    py_aliases: &super::python_imports::PythonAliases<'_>,
    out: &mut Vec<CommandSite<'a>>,
) {
    if let Some(site) = match_command_site(node, ctx.source, ctx.lang, go_aliases, py_aliases) {
        out.push(site);
    }
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        collect_command_sites(ctx, child, go_aliases, py_aliases, out);
    }
}

/// If `node` is a command-exec call (per language), return a `CommandSite`.
/// Otherwise `None`. AST shapes verified against `/tmp/ast-probe`.
fn match_command_site<'a>(
    node: tree_sitter::Node<'a>,
    source: &'a [u8],
    lang: Language,
    go_aliases: &GoImportAliases,
    py_aliases: &super::python_imports::PythonAliases<'_>,
) -> Option<CommandSite<'a>> {
    match (node.kind(), lang) {
        ("call", Language::Python) => match_python_call(node, source, py_aliases),
        ("call_expression", Language::JavaScript | Language::TypeScript) => {
            match_js_call(node, source)
        }
        ("call_expression", Language::Go) => match_go_call(node, source, go_aliases),
        _ => None,
    }
}

// ---------------------------------------------------------------------------
// Python
// ---------------------------------------------------------------------------

/// Classify a `(module, name)` pair against the Python command-exec
/// API table. Returns `(api, classified_arg_index)` for callees that
/// fire; `None` for unrelated functions.
///
/// Used by both the attribute-call branch (`os.system(...)`) and the
/// bare-identifier branch (`from os import system; system(...)`) of
/// `match_python_call`. Centralizing the table here means adding a new
/// API automatically covers both call shapes.
fn classify_python_command_callee(
    module: &str,
    name: &str,
    arg_nodes: &[tree_sitter::Node<'_>],
    source: &[u8],
) -> Option<(CommandApi, usize)> {
    Some(match (module, name) {
        ("os", "system") => (CommandApi::PyOsSystem, 0),
        ("os", "popen" | "popen2" | "popen3" | "popen4") => (CommandApi::PyOsPopen, 0),
        ("subprocess", "run" | "call" | "Popen" | "check_output" | "check_call") => {
            let api = if python_subprocess_shell_true(arg_nodes, source) {
                CommandApi::PySubprocessShell
            } else {
                CommandApi::PySubprocessNoShell
            };
            (api, 0)
        }
        ("subprocess", "getoutput" | "getstatusoutput") => (CommandApi::PySubprocessGetOutput, 0),
        ("commands", "getoutput" | "getstatusoutput") => (CommandApi::PyCommandsGetOutput, 0),
        ("pty", "spawn") => (CommandApi::PyPtySpawn, 0),
        _ => return None,
    })
}

/// Match a Python `call` node against the command-exec API list.
fn match_python_call<'a>(
    node: tree_sitter::Node<'a>,
    source: &'a [u8],
    aliases: &super::python_imports::PythonAliases<'_>,
) -> Option<CommandSite<'a>> {
    let func = node.child_by_field_name("function")?;
    let func = unwrap_callee(func);
    let args = node.child_by_field_name("arguments")?;
    let arg_nodes = collect_named_args(args);

    // Recognize callee shapes:
    //   `os.system`         → attribute object=identifier "os" attribute="system"
    //   `os.popen`          → ditto
    //   `subprocess.run`    → ditto
    //   `pty.spawn`         → ditto
    //   `commands.getoutput`→ ditto
    //   bare `system(cmd)`  → identifier; resolved via from-import alias
    //                         map to the originating module (closes B10).
    //   `sp.run(cmd)` after `import subprocess as sp` → attribute whose
    //                         object resolves through `aliases.modules`
    //                         to the canonical module name.
    let (api, classified_arg_index) = match func.kind() {
        "attribute" => {
            let obj = func.child_by_field_name("object")?;
            let attr = func.child_by_field_name("attribute")?;
            let attr_text = node_text(attr, source)?;
            let raw_label = receiver_chain_label(obj, source);
            // Resolve module aliases (`import subprocess as sp`) by
            // looking up the case-sensitive identifier text first,
            // then falling back to the lowercased label.
            let obj_text = node_text(obj, source).unwrap_or("");
            let obj_label = aliases
                .modules
                .get(obj_text)
                .or_else(|| aliases.modules.get(raw_label.as_str()))
                .cloned()
                .unwrap_or(raw_label);
            classify_python_command_callee(obj_label.as_str(), attr_text, &arg_nodes, source)?
        }
        "identifier" => {
            // Bare-call: only fires if a `from <module> import <name>`
            // bound this name to one of the known command-exec modules.
            let name = node_text(func, source)?;
            let module = aliases.imports.get(name)?;
            classify_python_command_callee(module.as_str(), name, &arg_nodes, source)?
        }
        _ => return None,
    };

    let target = arg_nodes.get(classified_arg_index).copied()?;
    // Skip keyword_argument nodes when picking arg[0] — `subprocess.run(shell=True, args=cmd)`
    // would otherwise misclassify.
    let target = if target.kind() == "keyword_argument" {
        // Find first non-keyword argument.
        arg_nodes
            .iter()
            .copied()
            .find(|a| a.kind() != "keyword_argument")?
    } else {
        target
    };
    let arg_kind = classify_command_arg_python(target, source);
    // For B15 metachar inspection: capture verbatim text of static
    // literals so the severity classifier can spot shell metachars.
    // Surrounding quotes are harmless to the check (none of our
    // metachars overlap with `"` / `'`).
    let target_text = if matches!(arg_kind, CommandArgKind::StaticLiteral) {
        node_text(target, source).map(|s| s.to_string())
    } else {
        None
    };

    Some(CommandSite {
        call_node: node,
        api,
        arg_kind,
        target_text,
    })
}

/// Inspect the keyword arguments of a `subprocess.*` call to determine
/// whether `shell=True` was passed. Non-literal values (e.g.
/// `shell=some_var`) are treated as `True` to match caller intent and
/// keep severity tables aligned between `PySubprocessShell` and
/// `PySubprocessNoShell` for the `Unknown` argv[0] kind.
fn python_subprocess_shell_true(args: &[tree_sitter::Node<'_>], source: &[u8]) -> bool {
    python_kwarg_truthy(args, "shell", source, /* unknown_default = */ true)
}

/// Classify the shape of a Python command-call argument.
#[allow(clippy::only_used_in_recursion)]
fn classify_command_arg_python(node: tree_sitter::Node<'_>, source: &[u8]) -> CommandArgKind {
    match node.kind() {
        "string" => {
            // f-string with interpolation children → Interpolated. Plain
            // string → StaticLiteral.
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if child.kind() == "interpolation" {
                    return CommandArgKind::Interpolated;
                }
            }
            CommandArgKind::StaticLiteral
        }
        "concatenated_string" => {
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if classify_command_arg_python(child, source) == CommandArgKind::Interpolated {
                    return CommandArgKind::Interpolated;
                }
            }
            CommandArgKind::StaticLiteral
        }
        "binary_operator" => {
            let mut found_var = false;
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if !child.is_named() {
                    continue;
                }
                match classify_command_arg_python(child, source) {
                    CommandArgKind::UserVariable
                    | CommandArgKind::Interpolated
                    | CommandArgKind::Unknown => found_var = true,
                    _ => {}
                }
            }
            if found_var {
                CommandArgKind::Interpolated
            } else {
                CommandArgKind::StaticLiteral
            }
        }
        "list" | "tuple" => {
            // Walk named children. Track first-element shape and any
            // non-literal shape across the rest.
            let mut elements: Vec<tree_sitter::Node<'_>> = Vec::new();
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if child.is_named() {
                    elements.push(child);
                }
            }
            classify_list_elements_py(&elements, source)
        }
        "identifier" | "attribute" | "subscript" | "call" => CommandArgKind::UserVariable,
        "lambda" => CommandArgKind::FunctionLike,
        "parenthesized_expression" => {
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    return classify_command_arg_python(c, source);
                }
            }
            CommandArgKind::Unknown
        }
        "await" => {
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    return classify_command_arg_python(c, source);
                }
            }
            CommandArgKind::Unknown
        }
        "conditional_expression" => {
            // Ternary `a if cond else b`. Combine both branches; strongest wins.
            let mut strongest = CommandArgKind::StaticLiteral;
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    let k = classify_command_arg_python(c, source);
                    strongest = strongest_arg_kind(strongest, k);
                }
            }
            strongest
        }
        _ => CommandArgKind::Unknown,
    }
}

/// Classify a Python `list`/`tuple` element vector.
fn classify_list_elements_py(elements: &[tree_sitter::Node<'_>], source: &[u8]) -> CommandArgKind {
    if elements.is_empty() {
        return CommandArgKind::StaticList;
    }
    let first_kind = classify_command_arg_python(elements[0], source);
    let mut all_literal = matches!(first_kind, CommandArgKind::StaticLiteral);
    let mut any_non_literal = !all_literal;
    for e in &elements[1..] {
        let k = classify_command_arg_python(*e, source);
        match k {
            CommandArgKind::StaticLiteral => {}
            _ => {
                all_literal = false;
                any_non_literal = true;
            }
        }
    }
    if all_literal {
        CommandArgKind::StaticList
    } else if !matches!(first_kind, CommandArgKind::StaticLiteral) {
        CommandArgKind::MixedListVarArgv0
    } else if any_non_literal {
        CommandArgKind::MixedListLiteralArgv0
    } else {
        CommandArgKind::StaticList
    }
}

// ---------------------------------------------------------------------------
// JavaScript / TypeScript
// ---------------------------------------------------------------------------

/// Match a JS/TS `call_expression` against the command-exec API list.
fn match_js_call<'a>(node: tree_sitter::Node<'a>, source: &'a [u8]) -> Option<CommandSite<'a>> {
    let func = node.child_by_field_name("function")?;
    let args = node.child_by_field_name("arguments")?;
    let arg_nodes = collect_named_args(args);
    let func = unwrap_callee(func);

    let api = match func.kind() {
        "identifier" => {
            // Bare `exec`, `execSync`, `spawn`, ... — likely destructured
            // from `child_process`. Treat as the corresponding API.
            match node_text(func, source)? {
                "exec" | "execAsync" => CommandApi::JsChildProcessExec,
                "execSync" => CommandApi::JsChildProcessExec,
                "execFile" | "execFileSync" => CommandApi::JsChildProcessExecFile,
                "spawn" | "spawnSync" => CommandApi::JsChildProcessSpawn,
                "fork" => CommandApi::JsChildProcessFork,
                _ => return None,
            }
        }
        "member_expression" => {
            let obj = func.child_by_field_name("object")?;
            let prop = func.child_by_field_name("property")?;
            let prop_text = node_text(prop, source)?;
            let recv = receiver_chain_label(obj, source);
            // Receiver chain may be `child_process` or any of the
            // common alias receivers. We accept:
            //   - `child_process.X(...)`
            //   - `cp.X(...)` (common alias)
            //   - `this.cp.X(...)` (member-of-member; receiver_chain_label
            //     normalizes to last segment lowercase, e.g. `"cp"`)
            //   - `require('child_process').X(...)` (audit B1)
            //   - `(await import('child_process')).X(...)` (audit B1)
            //
            // Audit B9: `process` was previously accepted as an alias
            // here, but `process.exec` / `process.spawn` are not real
            // Node APIs (the global `process` object has none of these
            // methods), so it produced false positives on user-defined
            // `process` objects. Removed.
            let cp_aliases = matches!(recv.as_str(), "child_process" | "cp" | "childprocess");
            let shelljs_aliases = matches!(recv.as_str(), "shelljs" | "shell" | "sh");
            if cp_aliases {
                match prop_text {
                    "exec" | "execAsync" | "execSync" => CommandApi::JsChildProcessExec,
                    "execFile" | "execFileSync" => CommandApi::JsChildProcessExecFile,
                    "spawn" | "spawnSync" => CommandApi::JsChildProcessSpawn,
                    "fork" => CommandApi::JsChildProcessFork,
                    _ => return None,
                }
            } else if shelljs_aliases && prop_text == "exec" {
                CommandApi::JsShellJsExec
            } else {
                return None;
            }
        }
        _ => return None,
    };

    // Audit B6: `child_process.spawn('cmd', args, { shell: true })` (and
    // execFile/spawnSync/execFileSync with the same options object) is
    // semantically equivalent to `exec` because the platform shell still
    // interprets argv[0] AND any args appended on the same shell line.
    // Promote to the always-shell tag so the severity table treats
    // argv[0] correctly.
    let shell_option_true = matches!(
        api,
        CommandApi::JsChildProcessSpawn | CommandApi::JsChildProcessExecFile
    ) && js_spawn_options_shell_true(&arg_nodes, source);
    let api = if shell_option_true {
        CommandApi::JsChildProcessExec
    } else {
        api
    };

    let first = arg_nodes.first().copied()?;
    let arg_kind_first = classify_command_arg_js(first, source);
    // When shell:true is in effect with a literal command string, the
    // argv array (arg[1]) is concatenated onto the shell line — a
    // variable-elements argv is a shell-injection sink in its own right.
    // Promote the classification accordingly.
    let arg_kind = if shell_option_true && matches!(arg_kind_first, CommandArgKind::StaticLiteral) {
        match arg_nodes.get(1).copied() {
            Some(second) => match classify_command_arg_js(second, source) {
                CommandArgKind::StaticLiteral | CommandArgKind::StaticList => arg_kind_first,
                other => other,
            },
            None => arg_kind_first,
        }
    } else {
        arg_kind_first
    };

    // B15: capture verbatim text of static-literal commands for the
    // shell-metachar severity bump.
    let target_text = if matches!(arg_kind, CommandArgKind::StaticLiteral) {
        node_text(first, source).map(|s| s.to_string())
    } else {
        None
    };

    Some(CommandSite {
        call_node: node,
        api,
        arg_kind,
        target_text,
    })
}

/// Inspect a JS/TS spawn-style call's argument list for an options
/// object containing `shell: true` (or any non-`false` shell value).
/// Mirrors `python_subprocess_shell_true` for the JS option-object
/// pattern.
///
/// Walks every `object` argument (typically the last one) looking for a
/// `pair` whose `key` resolves to `"shell"` and whose `value` is the
/// `true` literal node. A `value` that is anything other than `false`
/// (e.g. an identifier we can't resolve) is treated conservatively as
/// truthy — matches the typical caller intent.
fn js_spawn_options_shell_true(args: &[tree_sitter::Node<'_>], source: &[u8]) -> bool {
    for arg in args.iter().rev() {
        if arg.kind() != "object" {
            continue;
        }
        let mut cursor = arg.walk();
        for child in arg.children(&mut cursor) {
            if child.kind() != "pair" {
                continue;
            }
            let key = match child.child_by_field_name("key") {
                Some(k) => k,
                None => continue,
            };
            // key may be `property_identifier`, `string`, or
            // `computed_property_name`; accept the first two.
            let key_text = match key.kind() {
                "property_identifier" => node_text(key, source).map(|s| s.to_string()),
                "string" => js_string_literal_value(key, source),
                _ => None,
            };
            if key_text.as_deref() != Some("shell") {
                continue;
            }
            let value = match child.child_by_field_name("value") {
                Some(v) => v,
                None => continue,
            };
            match value.kind() {
                "true" => return true,
                "false" => return false,
                _ => return true, // unknown — assume truthy
            }
        }
        // Found an object arg; further objects are unlikely to be the
        // options bag, but keep scanning in case the convention is
        // different.
    }
    false
}

/// Classify the shape of a JS/TS command-call argument.
#[allow(clippy::only_used_in_recursion)]
fn classify_command_arg_js(node: tree_sitter::Node<'_>, source: &[u8]) -> CommandArgKind {
    match node.kind() {
        "string" => CommandArgKind::StaticLiteral,
        "template_string" => {
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if child.kind() == "template_substitution" {
                    return CommandArgKind::Interpolated;
                }
            }
            CommandArgKind::StaticLiteral
        }
        "binary_expression" => {
            let left = node.child_by_field_name("left");
            let right = node.child_by_field_name("right");
            let mut found_var = false;
            for opt in [left, right].iter().flatten() {
                match classify_command_arg_js(*opt, source) {
                    CommandArgKind::UserVariable
                    | CommandArgKind::Interpolated
                    | CommandArgKind::Unknown => {
                        found_var = true;
                    }
                    _ => {}
                }
            }
            if found_var {
                CommandArgKind::Interpolated
            } else {
                CommandArgKind::StaticLiteral
            }
        }
        "array" => {
            let mut elements: Vec<tree_sitter::Node<'_>> = Vec::new();
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if child.is_named() {
                    elements.push(child);
                }
            }
            classify_list_elements_js(&elements, source)
        }
        "identifier" | "member_expression" | "subscript_expression" | "call_expression" => {
            CommandArgKind::UserVariable
        }
        "arrow_function" | "function_expression" | "function" | "function_declaration" => {
            CommandArgKind::FunctionLike
        }
        "parenthesized_expression" => {
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    return classify_command_arg_js(c, source);
                }
            }
            CommandArgKind::Unknown
        }
        "await_expression"
        | "as_expression"
        | "type_assertion_expression"
        | "non_null_expression"
        | "satisfies_expression" => {
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    return classify_command_arg_js(c, source);
                }
            }
            CommandArgKind::Unknown
        }
        "ternary_expression" => {
            let consequence = node.child_by_field_name("consequence");
            let alternative = node.child_by_field_name("alternative");
            let mut strongest = CommandArgKind::StaticLiteral;
            for opt in [consequence, alternative].iter().flatten() {
                let k = classify_command_arg_js(*opt, source);
                strongest = strongest_arg_kind(strongest, k);
            }
            strongest
        }
        _ => CommandArgKind::Unknown,
    }
}

fn classify_list_elements_js(elements: &[tree_sitter::Node<'_>], source: &[u8]) -> CommandArgKind {
    if elements.is_empty() {
        return CommandArgKind::StaticList;
    }
    let first_kind = classify_command_arg_js(elements[0], source);
    let mut all_literal = matches!(first_kind, CommandArgKind::StaticLiteral);
    let mut any_non_literal = !all_literal;
    for e in &elements[1..] {
        let k = classify_command_arg_js(*e, source);
        match k {
            CommandArgKind::StaticLiteral => {}
            _ => {
                all_literal = false;
                any_non_literal = true;
            }
        }
    }
    if all_literal {
        CommandArgKind::StaticList
    } else if !matches!(first_kind, CommandArgKind::StaticLiteral) {
        CommandArgKind::MixedListVarArgv0
    } else if any_non_literal {
        CommandArgKind::MixedListLiteralArgv0
    } else {
        CommandArgKind::StaticList
    }
}

// ---------------------------------------------------------------------------
// Go
// ---------------------------------------------------------------------------

/// Match a Go `call_expression` against `exec.Command` /
/// `exec.CommandContext` / `syscall.Exec` / `syscall.StartProcess`.
///
/// The shell-injection form `exec.Command("sh", "-c", USER_VAR)` is
/// recognized by inspecting arg[0] (literal shell name), arg[1]
/// (literal `-c` or `/c`) and arg[2] (variable). Detected this way, the
/// site fires at Critical even though arg[0] is a static literal, because
/// the dangerous part is arg[2].
fn match_go_call<'a>(
    node: tree_sitter::Node<'a>,
    source: &'a [u8],
    aliases: &GoImportAliases,
) -> Option<CommandSite<'a>> {
    let func = node.child_by_field_name("function")?;
    if func.kind() != "selector_expression" {
        return None;
    }
    let operand = func.child_by_field_name("operand")?;
    let field = func.child_by_field_name("field")?;
    let raw_operand_label = receiver_chain_label_go(operand, source);
    // Resolve aliases from `import e "os/exec"` etc to their canonical
    // package label so the match arms below stay simple.
    let operand_label = aliases.canonical(&raw_operand_label);
    let field_text = node_text(field, source)?;
    let args = node.child_by_field_name("arguments")?;
    let arg_nodes = collect_named_args(args);

    // `exec.Command(name, args...)`: name is arg[0].
    // `exec.CommandContext(ctx, name, args...)`: name is arg[1].
    // `syscall.Exec(path, argv, envv)`: path is arg[0].
    // `syscall.StartProcess(name, argv, attr)`: name is arg[0].
    let (api_base, name_idx) = match (operand_label, field_text) {
        ("exec", "Command") => (CommandApi::GoExecCommand, 0usize),
        ("exec", "CommandContext") => (CommandApi::GoExecCommand, 1usize),
        ("syscall", "Exec" | "StartProcess") => (CommandApi::GoSyscallExec, 0usize),
        _ => return None,
    };

    // Detect the canonical shell-c form: arg[name_idx] is a literal
    // shell name, arg[name_idx+1] is a literal `-c` / `/c`, and there's
    // at least one further arg that is variable.
    if api_base == CommandApi::GoExecCommand {
        if let (Some(name_node), Some(flag_node)) =
            (arg_nodes.get(name_idx), arg_nodes.get(name_idx + 1))
        {
            let name_lit = go_string_literal_value(*name_node, source);
            let flag_lit = go_string_literal_value(*flag_node, source);
            if let (Some(name), Some(flag)) = (name_lit.as_deref(), flag_lit.as_deref()) {
                let is_shell = matches!(
                    name,
                    "sh" | "bash" | "/bin/sh" | "/bin/bash" | "cmd.exe" | "powershell"
                );
                let is_c_flag = matches!(flag, "-c" | "/c" | "/C" | "-Command");
                if is_shell && is_c_flag {
                    if let Some(rest) = arg_nodes.get(name_idx + 2) {
                        let kind = classify_command_arg_go(*rest, source);
                        if !matches!(
                            kind,
                            CommandArgKind::StaticLiteral | CommandArgKind::FunctionLike
                        ) {
                            return Some(CommandSite {
                                call_node: node,
                                api: CommandApi::GoExecCommandShellC,
                                arg_kind: kind,
                                // GoExecCommandShellC is always Critical
                                // regardless of metachars; no need to
                                // capture the literal here.
                                target_text: None,
                            });
                        }
                    }
                }
            }
        }
    }

    // General form: classify the "name" argument.
    let target = arg_nodes.get(name_idx).copied()?;
    let arg_kind = classify_command_arg_go(target, source);
    // B15: capture verbatim text of static-literal commands for the
    // shell-metachar severity bump.
    let target_text = if matches!(arg_kind, CommandArgKind::StaticLiteral) {
        node_text(target, source).map(|s| s.to_string())
    } else {
        None
    };

    Some(CommandSite {
        call_node: node,
        api: api_base,
        arg_kind,
        target_text,
    })
}

/// Extract the value of a Go `interpreted_string_literal` /
/// `raw_string_literal` if it has no interpolation. Returns `None` for
/// non-string nodes.
///
/// Audit B5: descends through `parenthesized_expression` so
/// `exec.Command(("sh"), "-c", userInput)` is still classified as a
/// shell-c form (parenthesized shell name was previously dropped, which
/// degraded the finding to general-form Low and got it filtered).
fn go_string_literal_value(node: tree_sitter::Node<'_>, source: &[u8]) -> Option<String> {
    match node.kind() {
        "interpreted_string_literal" | "raw_string_literal" => {
            // Look for child "interpreted_string_literal_content" or
            // "raw_string_literal_content". If absent, fall back to
            // stripping the outer quote bytes.
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if matches!(
                    child.kind(),
                    "interpreted_string_literal_content"
                        | "raw_string_literal_content"
                        | "string_content"
                ) {
                    return node_text(child, source).map(|s| s.to_string());
                }
            }
            // Fallback: strip outer quotes.
            let raw = node_text(node, source)?;
            let trimmed = raw
                .strip_prefix('"')
                .and_then(|s| s.strip_suffix('"'))
                .or_else(|| raw.strip_prefix('`').and_then(|s| s.strip_suffix('`')))
                .unwrap_or(raw);
            Some(trimmed.to_string())
        }
        // Recurse through wrapper nodes so `("sh")` is still recognised.
        "parenthesized_expression" => {
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    return go_string_literal_value(c, source);
                }
            }
            None
        }
        _ => None,
    }
}

#[allow(clippy::only_used_in_recursion)]
fn classify_command_arg_go(node: tree_sitter::Node<'_>, source: &[u8]) -> CommandArgKind {
    match node.kind() {
        "interpreted_string_literal" | "raw_string_literal" => CommandArgKind::StaticLiteral,
        "binary_expression" => {
            let left = node.child_by_field_name("left");
            let right = node.child_by_field_name("right");
            let mut found_var = false;
            for opt in [left, right].iter().flatten() {
                match classify_command_arg_go(*opt, source) {
                    CommandArgKind::UserVariable
                    | CommandArgKind::Interpolated
                    | CommandArgKind::Unknown => {
                        found_var = true;
                    }
                    _ => {}
                }
            }
            if found_var {
                CommandArgKind::Interpolated
            } else {
                CommandArgKind::StaticLiteral
            }
        }
        "identifier" | "selector_expression" | "index_expression" | "call_expression" => {
            CommandArgKind::UserVariable
        }
        "func_literal" => CommandArgKind::FunctionLike,
        "parenthesized_expression" => {
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    return classify_command_arg_go(c, source);
                }
            }
            CommandArgKind::Unknown
        }
        // Composite-literal `[]string{"a", b}` — treat as list.
        "composite_literal" => {
            // Walk into the literal_value child for elements.
            let mut elements: Vec<tree_sitter::Node<'_>> = Vec::new();
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                if child.kind() == "literal_value" {
                    let mut c2 = child.walk();
                    for inner in child.children(&mut c2) {
                        if inner.is_named() {
                            // `keyed_element` for indexed elements; descend.
                            if inner.kind() == "keyed_element" {
                                if let Some(value) = inner.child_by_field_name("value") {
                                    elements.push(value);
                                }
                            } else if inner.kind() != "literal_element" {
                                elements.push(inner);
                            } else {
                                // literal_element wraps the value.
                                for j in 0..inner.named_child_count() {
                                    if let Some(c) = inner.named_child(j) {
                                        elements.push(c);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            classify_list_elements_go(&elements, source)
        }
        _ => CommandArgKind::Unknown,
    }
}

fn classify_list_elements_go(elements: &[tree_sitter::Node<'_>], source: &[u8]) -> CommandArgKind {
    if elements.is_empty() {
        return CommandArgKind::StaticList;
    }
    let first_kind = classify_command_arg_go(elements[0], source);
    let mut all_literal = matches!(first_kind, CommandArgKind::StaticLiteral);
    let mut any_non_literal = !all_literal;
    for e in &elements[1..] {
        let k = classify_command_arg_go(*e, source);
        match k {
            CommandArgKind::StaticLiteral => {}
            _ => {
                all_literal = false;
                any_non_literal = true;
            }
        }
    }
    if all_literal {
        CommandArgKind::StaticList
    } else if !matches!(first_kind, CommandArgKind::StaticLiteral) {
        CommandArgKind::MixedListVarArgv0
    } else if any_non_literal {
        CommandArgKind::MixedListLiteralArgv0
    } else {
        CommandArgKind::StaticList
    }
}

// `receiver_chain_label_go` lives in `ast_helpers`; imported above.

/// Resolved alias map for Go `import` declarations.
///
/// Audit B2 fix: `import e "os/exec"` followed by `e.Command(b)` was
/// silently missed because the matcher used a literal text-equality
/// check on the receiver. We now scan the file's `import_declaration`
/// nodes once and build an alias→canonical-package map so any
/// non-canonical receiver name maps back to `exec` / `syscall`.
#[derive(Debug, Default, Clone)]
struct GoImportAliases {
    /// Set of identifier names that should be treated as the `os/exec`
    /// package. Always contains `"exec"` (the canonical form). Adds any
    /// alias from `import alias "os/exec"`.
    exec_aliases: std::collections::HashSet<String>,
    /// Same idea for `syscall`.
    syscall_aliases: std::collections::HashSet<String>,
}

impl GoImportAliases {
    /// Resolve a (lowercased) receiver label to its canonical Go package
    /// name. Returns `"exec"` / `"syscall"` for known aliases, otherwise
    /// the input string.
    fn canonical<'s>(&self, label: &'s str) -> &'s str {
        if self.exec_aliases.contains(label) {
            return "exec";
        }
        if self.syscall_aliases.contains(label) {
            return "syscall";
        }
        label
    }
}

/// Walk the Go file root and collect `(alias → canonical-package)`
/// mappings for the dangerous packages (`os/exec`, `syscall`). Returns
/// the canonical names by default if no alias was given.
fn collect_go_import_aliases(root: tree_sitter::Node<'_>, source: &[u8]) -> GoImportAliases {
    let mut out = GoImportAliases::default();
    // Canonical package names always resolve to themselves.
    out.exec_aliases.insert("exec".to_string());
    out.syscall_aliases.insert("syscall".to_string());

    fn visit(node: tree_sitter::Node<'_>, source: &[u8], out: &mut GoImportAliases, depth: u8) {
        // Imports are always at file scope; bail past a reasonable
        // descent depth to avoid walking the whole file.
        if depth > 4 {
            return;
        }
        if node.kind() == "import_spec" {
            handle_import_spec(node, source, out);
            return;
        }
        let mut cursor = node.walk();
        for child in node.children(&mut cursor) {
            visit(child, source, out, depth + 1);
        }
    }

    fn handle_import_spec(node: tree_sitter::Node<'_>, source: &[u8], out: &mut GoImportAliases) {
        let path = match node.child_by_field_name("path") {
            Some(p) => p,
            None => return,
        };
        // Strip outer quotes from `"os/exec"` / `"syscall"`.
        let raw = match node_text(path, source) {
            Some(s) => s,
            None => return,
        };
        let pkg_path = raw
            .strip_prefix('"')
            .and_then(|s| s.strip_suffix('"'))
            .unwrap_or(raw);
        // Determine the local name. If there's a `name` field, use
        // that; otherwise default to the last segment of the path.
        let local = match node.child_by_field_name("name") {
            Some(n) => node_text(n, source).map(|s| s.to_string()),
            None => pkg_path.rsplit('/').next().map(|s| s.to_string()),
        };
        let local = match local {
            Some(s) if !s.is_empty() && s != "_" && s != "." => s,
            _ => return,
        };
        let local_lower = local.to_lowercase();
        match pkg_path {
            "os/exec" => {
                out.exec_aliases.insert(local_lower);
            }
            "syscall" => {
                out.syscall_aliases.insert(local_lower);
            }
            _ => {}
        }
    }

    visit(root, source, &mut out, 0);
    out
}

// ---------------------------------------------------------------------------
// Generic helpers (mirrored from eval_detector)
// ---------------------------------------------------------------------------

// `collect_named_args` and `unwrap_callee` live in `ast_helpers`; imported
// at the top of the module.

/// Lowercased "receiver label" for a JS/TS member-call receiver.
/// Mirrors `eval_detector::receiver_chain_label`.
///
/// Audit B1 extension: when the receiver is itself a `call_expression`
/// of `require('child_process')` (or `import('child_process')` inside an
/// `await_expression`), return the canonical `"child_process"` label so
/// that `require('child_process').exec(userInput)` is recognised as
/// `child_process.exec(...)`. This is the single most idiomatic Node
/// shape and was silently missed in the AST migration.
///
/// Implementation: delegate to the shared
/// [`receiver_chain_label`](crate::detectors::security::ast_helpers::receiver_chain_label)
/// passing this detector's local [`call_expression_module_label`] as the
/// resolver — that's the only piece that varies between detectors (it
/// names the dangerous module(s) for *this* detector, e.g.
/// `child_process`).
fn receiver_chain_label(node: tree_sitter::Node<'_>, source: &[u8]) -> String {
    receiver_chain_label_shared(node, source, Some(&call_expression_module_label))
}

/// If `node` is `require('MODULE')` or `import('MODULE')`, return the
/// canonical lowercased module name. Returns `None` for other shapes.
///
/// Recognised callees: bare identifier `require`, bare identifier
/// `import`, and the `import` keyword node (some grammars emit
/// `import_expression`/`import_keyword`).
fn call_expression_module_label(
    node: tree_sitter::Node<'_>,
    source: &[u8],
) -> Option<&'static str> {
    debug_assert_eq!(node.kind(), "call_expression");
    let func = node.child_by_field_name("function")?;
    let func_text = node_text(func, source)?;
    let is_require_or_import =
        matches!(func.kind(), "identifier" | "import") && matches!(func_text, "require" | "import");
    if !is_require_or_import {
        return None;
    }
    let args = node.child_by_field_name("arguments")?;
    let arg_nodes = collect_named_args(args);
    let first = arg_nodes.first()?;
    let module = js_string_literal_value(*first, source)?;
    // Map known dangerous modules to their canonical label.
    match module.as_str() {
        "child_process" | "node:child_process" => Some("child_process"),
        _ => None,
    }
}

/// Extract the inner content of a JS/TS string literal node, stripping
/// outer quotes. Returns `None` for template strings (with or without
/// substitutions) — only single/double-quoted literals are accepted.
fn js_string_literal_value(node: tree_sitter::Node<'_>, source: &[u8]) -> Option<String> {
    if node.kind() != "string" {
        return None;
    }
    // tree-sitter-javascript exposes `string_fragment` children for the
    // raw text of a string literal. Concatenate them in order.
    let mut cursor = node.walk();
    let mut buf = String::new();
    let mut saw_fragment = false;
    for child in node.children(&mut cursor) {
        if child.kind() == "string_fragment" {
            if let Some(t) = node_text(child, source) {
                buf.push_str(t);
                saw_fragment = true;
            }
        }
    }
    if saw_fragment {
        return Some(buf);
    }
    // Fallback: strip the outer quotes from the full text. Empty string
    // literal `""` has no `string_fragment` child.
    let raw = node_text(node, source)?;
    let inner = raw
        .strip_prefix('"')
        .and_then(|s| s.strip_suffix('"'))
        .or_else(|| raw.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')))?;
    Some(inner.to_string())
}

/// Combine two `CommandArgKind`s and keep the strongest signal.
fn strongest_arg_kind(a: CommandArgKind, b: CommandArgKind) -> CommandArgKind {
    fn rank(k: CommandArgKind) -> u8 {
        match k {
            CommandArgKind::MixedListVarArgv0 => 6,
            CommandArgKind::UserVariable => 5,
            CommandArgKind::Interpolated => 4,
            CommandArgKind::MixedListLiteralArgv0 => 3,
            CommandArgKind::Unknown => 2,
            CommandArgKind::FunctionLike => 1,
            CommandArgKind::StaticList => 0,
            CommandArgKind::StaticLiteral => 0,
        }
    }
    if rank(a) >= rank(b) {
        a
    } else {
        b
    }
}

// `node_text` lives in `ast_helpers`; imported above.

// ---------------------------------------------------------------------------
// Line scanner (Ruby, PHP, Java, sh)
// ---------------------------------------------------------------------------

/// Recognize common command-injection forms in non-AST languages.
/// Returns `(api, classified-arg-shape)` if the line looks like one.
fn match_line_command(line: &str, ext: &str) -> Option<(CommandApi, CommandArgKind)> {
    static RUBY_BACKTICK_RE: LazyLock<Regex> =
        LazyLock::new(|| Regex::new(r"`[^`\n]*`").expect("valid regex"));
    // Audit B4: require a non-member-access prefix before the function
    // name so `obj.system(x)` (a method call) does not fire as the
    // bare-global `system(x)`. The leading `(?:^|[^.>])` guards against
    // `obj.system` and `$obj->system`. Includes `popen` (audit B8).
    static RUBY_SYSTEM_RE: LazyLock<Regex> =
        LazyLock::new(|| Regex::new(r"(?:^|[^.>])\b(system|exec)\s*\(").expect("valid regex"));
    static PHP_SHELL_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(r"(?:^|[^.>])\b(system|shell_exec|passthru|proc_open|popen|exec)\s*\(")
            .expect("valid regex")
    });
    static JAVA_RUNTIME_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(r"(Runtime\.getRuntime\(\)\.exec|new\s+ProcessBuilder)\s*\(")
            .expect("valid regex")
    });

    match ext {
        "rb" => {
            // Backtick string with `#{...}` interpolation → Critical.
            if let Some(m) = RUBY_BACKTICK_RE.find(line) {
                let bt = m.as_str();
                if bt.contains("#{") {
                    return Some((CommandApi::PyOsSystem, CommandArgKind::Interpolated));
                }
                return Some((CommandApi::PyOsSystem, CommandArgKind::StaticLiteral));
            }
            // `system(...)` / `exec(...)` — line-level arg-shape probe.
            if let Some(m) = RUBY_SYSTEM_RE.find(line) {
                let after = &line[m.end()..];
                let arg = classify_line_arg(after);
                // Ruby `system`/`exec` are shell-aware in the
                // single-string form. We model as PyOsSystem (always-shell)
                // for severity purposes — it is the closest analog.
                return Some((CommandApi::PyOsSystem, arg));
            }
            None
        }
        "php" => {
            if let Some(m) = PHP_SHELL_RE.find(line) {
                let after = &line[m.end()..];
                let arg = classify_line_arg(after);
                let api = if line.contains("shell_exec(") {
                    CommandApi::JsShellJsExec
                } else {
                    CommandApi::PyOsSystem
                };
                return Some((api, arg));
            }
            None
        }
        "java" => {
            if let Some(m) = JAVA_RUNTIME_RE.find(line) {
                let after = &line[m.end()..];
                let arg = classify_line_arg(after);
                return Some((CommandApi::PyOsSystem, arg));
            }
            None
        }
        "sh" => None,
        _ => None,
    }
}

/// Cheap line-text classification of a command argument for the
/// non-AST languages.
fn classify_line_arg(after_paren: &str) -> CommandArgKind {
    let trimmed = after_paren.trim_start();
    if trimmed.starts_with('[') || trimmed.starts_with("array(") {
        // Array form. We can't easily classify content here; assume
        // mixed shape with literal argv[0].
        return CommandArgKind::MixedListLiteralArgv0;
    }
    if trimmed.starts_with('"') || trimmed.starts_with('\'') {
        let quote = trimmed.as_bytes()[0];
        let mut i = 1;
        let bytes = trimmed.as_bytes();
        let mut had_interp = false;
        while i < bytes.len() {
            let c = bytes[i];
            if c == b'\\' {
                i += 2;
                continue;
            }
            if c == quote {
                break;
            }
            // Ruby `"#{...}"`.
            if quote == b'"' && c == b'#' && bytes.get(i + 1) == Some(&b'{') {
                had_interp = true;
            }
            // PHP `"$var"` and `"{$var}"`.
            if quote == b'"' && c == b'$' {
                had_interp = true;
            }
            i += 1;
        }
        // Then check for trailing `+` or `.` concat — text-level signal
        // that the arg is built from a literal plus a variable.
        let after_str = std::str::from_utf8(&bytes[i + 1..]).unwrap_or("");
        let concat =
            after_str.trim_start().starts_with('+') || after_str.trim_start().starts_with('.');
        if had_interp || concat {
            CommandArgKind::Interpolated
        } else {
            CommandArgKind::StaticLiteral
        }
    } else if trimmed.starts_with(')') {
        CommandArgKind::Unknown
    } else {
        // Identifier / member access / superglobal (`$_GET[...]`).
        CommandArgKind::UserVariable
    }
}

/// Returns the list of shell metacharacters present in a static-literal
/// command string. Used to bump severity for literals that look static
/// but contain command-chaining / substitution operators (audit B15).
///
/// Only the metacharacters that change control-flow or invoke a
/// subshell are listed; quoting and globbing are excluded because they
/// don't, on their own, escalate a literal to an injection vector.
fn shell_metachars_in(s: &str) -> Vec<&'static str> {
    let mut out = Vec::new();
    if s.contains(';') {
        out.push(";");
    }
    if s.contains("&&") {
        out.push("&&");
    }
    if s.contains("||") {
        out.push("||");
    }
    if s.contains('|') && !s.contains("||") {
        out.push("|");
    }
    if s.contains('`') {
        out.push("`");
    }
    if s.contains("$(") {
        out.push("$(");
    }
    if s.contains('>') && !s.contains(">(") {
        out.push(">");
    }
    if s.contains("<(") {
        out.push("<(");
    }
    if s.contains(">(") {
        out.push(">(");
    }
    out
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;
    use crate::detectors::base::Detector;
    use crate::graph::builder::GraphBuilder;

    // -------------------------------------------------------------
    // Pre-existing tests (preserved as smoke / shape regression).
    // -------------------------------------------------------------

    #[test]
    fn test_detects_os_system_with_user_input() {
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![(
                "vuln.py",
                "import os\n\ndef run_command(user_input):\n    os.system(\"ls \" + user_input)\n",
            )],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect os.system with user input concatenation"
        );
        assert!(
            findings
                .iter()
                .any(|f| f.title.to_lowercase().contains("command injection")),
            "Finding should mention command injection. Titles: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
        assert!(
            findings
                .iter()
                .any(|f| f.cwe_id.as_deref() == Some("CWE-78")),
            "Finding should have CWE-78"
        );
    }

    #[test]
    fn test_no_findings_for_safe_subprocess() {
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("safe.py", "import subprocess\n\ndef list_files():\n    result = subprocess.run([\"ls\", \"-la\"], capture_output=True)\n    return result.stdout\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Safe subprocess usage with list args should have no findings, but got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_subprocess_shell_true_python() {
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("run.py", "import subprocess\n\ndef execute(user_input):\n    subprocess.call(\"grep \" + user_input, shell=True)\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect subprocess.call with shell=True and user input"
        );
        assert!(
            findings
                .iter()
                .any(|f| f.cwe_id.as_deref() == Some("CWE-78")),
            "Finding should have CWE-78"
        );
    }

    #[test]
    fn test_detects_child_process_exec_with_template_js() {
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("handler.js", "const { exec } = require('child_process');\n\nfunction runCommand(req, res) {\n    const userId = req.params.id;\n    child_process.exec(`find /data -user ${userId}`);\n}\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect child_process.exec with template literal interpolation"
        );
        assert!(
            findings
                .iter()
                .any(|f| f.title.to_lowercase().contains("command injection")),
            "Finding should mention command injection. Titles: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_no_finding_for_exec_in_comment() {
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("safe.js", "// Dangerous example: os.system(user_input) - never do this\nfunction safeFunc() {\n    return 42;\n}\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "os.system in a comment should not produce findings, but got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_go_exec_command() {
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("handler.go", "package main\n\nimport (\n\t\"os/exec\"\n\t\"net/http\"\n)\n\nfunc runCmd(w http.ResponseWriter, r *http.Request) {\n\tcmd := r.FormValue(\"command\")\n\texec.Command(cmd)\n}\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect exec.Command with user input from r.FormValue. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
        assert!(
            findings
                .iter()
                .any(|f| f.title.to_lowercase().contains("exec.command")),
            "Finding should mention exec.Command. Titles: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    // =================================================================
    // Cohort 1 — currently-passing-shape (must remain green post-migration)
    // =================================================================

    #[test]
    fn test_detects_subprocess_run_with_shell_true_python() {
        let content = "import subprocess\n\ndef run(user_input):\n    subprocess.run(user_input, shell=True)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect subprocess.run with shell=True and user input. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
        assert!(
            findings
                .iter()
                .any(|f| f.cwe_id.as_deref() == Some("CWE-78")),
            "Finding should carry CWE-78"
        );
    }

    #[test]
    fn test_detects_os_system_python() {
        let content = "import os\n\ndef run(user_input):\n    os.system(user_input)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect os.system(user_input). Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_child_process_exec_javascript() {
        let content = "const child_process = require('child_process');\n\nfunction run(req, res) {\n    child_process.exec(req.body.cmd);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect child_process.exec(req.body.cmd). Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_ruby_backtick_with_interpolation() {
        let content = "def list_files(user_path)\n  result = `ls #{user_path}`\n  result\nend\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.rb", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        // Forward-compatible regression guard — Ruby is line-path only.
        let _ = findings;
    }

    #[test]
    fn test_detects_php_system_with_user_input() {
        let content = "<?php\nfunction run() {\n    system($_GET['cmd']);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.php", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        let _ = findings;
    }

    #[test]
    fn test_skips_command_in_comment() {
        let content = "import subprocess\n\ndef safe(x):\n    # subprocess.run(thing, shell=True)\n    return x\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("safe.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "subprocess.run inside a comment must not fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    // =================================================================
    // Cohort 2 — audit-shape (line scanner may FP / FN; AST migration must fix)
    // =================================================================

    #[test]
    fn test_skips_subprocess_run_with_static_list_args() {
        let content =
            "import subprocess\n\ndef list_files():\n    subprocess.run([\"git\", \"status\"])\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("safe.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "subprocess.run with all-literal list args is safe. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_subprocess_run_with_user_in_list_first_position() {
        let content = "import subprocess\n\ndef run(user_binary):\n    subprocess.run([user_binary, \"--flag\"])\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "argv[0] user-controlled in subprocess.run list must fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_skips_string_literal_mentioning_subprocess() {
        let content =
            "def doc():\n    msg = \"Use subprocess.run() to call commands\"\n    return msg\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("docs.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "subprocess.run inside a string literal must not fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_skips_exec_as_method_name() {
        let content =
            "class Runner:\n    def exec(self, cmd):\n        return cmd\n\nr = Runner()\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("safe.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "exec as a method-name definition must not fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_subprocess_with_concatenation() {
        let content = "import subprocess\n\ndef run(user_dir):\n    subprocess.run(\"ls \" + user_dir, shell=True)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "subprocess.run with `+`-concatenated tainted arg + shell=True must fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    // =================================================================
    // Cohort 3 — audit-pending (resolved by the AST migration)
    // =================================================================

    #[test]
    fn test_detects_python_fstring_in_subprocess() {
        let content = "import subprocess\n\ndef run(user_dir):\n    subprocess.run(f\"ls {user_dir}\", shell=True)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "f-string with user_dir interpolation must fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_js_template_literal_in_exec() {
        let content = "const child_process = require('child_process');\n\nfunction run(userDir) {\n    child_process.exec(`ls ${userDir}`);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "template-literal interpolation in exec must fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_severity_critical_for_user_input_low_for_static_literal() {
        let content =
            "import os\n\ndef run(user_input):\n    os.system(user_input)\n    os.system(\"date\")\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("mixed.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        let critical_count = findings
            .iter()
            .filter(|f| f.severity == Severity::Critical)
            .count();
        let low_count = findings
            .iter()
            .filter(|f| f.severity == Severity::Low)
            .count();
        assert!(
            critical_count >= 1,
            "Expected >=1 Critical for tainted os.system. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, &f.severity))
                .collect::<Vec<_>>()
        );
        assert!(
            findings.len() <= critical_count + low_count,
            "Static-literal os.system should be Low or filtered, not Critical. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, &f.severity))
                .collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_detects_go_exec_command_with_user_arg() {
        let content = "package main\n\nimport (\n\t\"os/exec\"\n\t\"net/http\"\n)\n\nfunc runOne(r *http.Request) {\n\tuserBin := r.FormValue(\"bin\")\n\texec.Command(userBin, \"--flag\")\n}\n\nfunc runTwo(r *http.Request) {\n\tuserInput := r.FormValue(\"cmd\")\n\texec.Command(\"sh\", \"-c\", userInput)\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.go", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.len() >= 2,
            "Both exec.Command call shapes must fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    // =================================================================
    // Self-audit regression tests (B1–B11)
    // =================================================================

    /// B1 (CRITICAL): `require('child_process').exec(userInput)` must fire.
    #[test]
    fn test_b1_require_child_process_exec_detected() {
        let content =
            "function run(req, res) {\n    require('child_process').exec(req.body.cmd);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "require('child_process').exec(...) must produce a finding. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B1 (CRITICAL): `require('child_process').execSync(userInput)` must fire.
    #[test]
    #[allow(non_snake_case)]
    fn test_b1_require_child_process_execSync_detected() {
        let content =
            "function run(userInput) {\n    require('child_process').execSync(userInput);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "require('child_process').execSync(...) must produce a finding. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B1 (CRITICAL): `(await import('child_process')).exec(userInput)` must fire.
    #[test]
    fn test_b1_await_import_child_process_exec_detected() {
        let content = "async function run(userInput) {\n    (await import('child_process')).exec(userInput);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "(await import('child_process')).exec(...) must produce a finding. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B2/B3 (HIGH): Aliased Go import `import e "os/exec"; e.Command(b)`.
    #[test]
    fn test_b2_go_aliased_exec_command_detected() {
        let content =
            "package main\n\nimport e \"os/exec\"\n\nfunc handler(b string) {\n\te.Command(b)\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.go", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Aliased exec.Command via `import e \"os/exec\"` must fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B2/B3 (HIGH): Aliased Go import + CommandContext.
    #[test]
    fn test_b2_go_aliased_exec_commandcontext_detected() {
        let content = "package main\n\nimport (\n\tx \"os/exec\"\n\t\"context\"\n)\n\nfunc handler(ctx context.Context, b string) {\n\tx.CommandContext(ctx, b)\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.go", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Aliased exec.CommandContext must fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B4 (MEDIUM): PHP `$obj->system($input)` is a method call, not the global.
    #[test]
    fn test_b4_php_object_method_does_not_fire() {
        let content = "<?php\nfunction run($obj, $input) {\n    $obj->system($input);\n    $obj->exec($input);\n    $obj->shell_exec($input);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("safe.php", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Object-method `$obj->system($x)` must not fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B4 (MEDIUM): Ruby `obj.system(arg)` is a method call, not Kernel#system.
    #[test]
    fn test_b4_ruby_object_method_does_not_fire() {
        let content = "def run(obj, arg)\n  obj.system(arg)\n  obj.exec(arg)\nend\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("safe.rb", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Object-method `obj.system(x)` must not fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B5 (MEDIUM): `exec.Command(("sh"), "-c", userInput)` shell-c form.
    #[test]
    fn test_b5_go_shell_c_with_parenthesized_name_detected() {
        let content = "package main\n\nimport (\n\t\"os/exec\"\n\t\"net/http\"\n)\n\nfunc handler(r *http.Request) {\n\tuserInput := r.FormValue(\"cmd\")\n\texec.Command((\"sh\"), \"-c\", userInput)\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.go", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.iter().any(|f| f.severity == Severity::Critical),
            "Parenthesized shell name in shell-c form must produce Critical. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, &f.severity))
                .collect::<Vec<_>>()
        );
    }

    /// B6 (MEDIUM): `child_process.spawn('cmd', args, { shell: true })` boosted.
    #[test]
    fn test_b6_js_spawn_with_shell_true_option_boosted() {
        let content = "const child_process = require('child_process');\nfunction run(userArgs) {\n    child_process.spawn('cmd', userArgs, { shell: true });\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        // shell:true makes spawn equivalent to exec; static literal cmd
        // should still surface a finding because the shell still
        // interprets the arguments. We require at least one finding (not
        // filtered as Low).
        assert!(
            !findings.is_empty(),
            "spawn(..., {{ shell: true }}) must boost severity above Low. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B6 (MEDIUM): `child_process.execFile('cmd', args, { shell: true })` boosted.
    #[test]
    fn test_b6_js_execfile_with_shell_true_option_boosted() {
        let content = "const child_process = require('child_process');\nfunction run(userArgs) {\n    child_process.execFile('cmd', userArgs, { shell: true });\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("handler.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "execFile(..., {{ shell: true }}) must boost severity above Low. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B8 (MEDIUM): PHP `popen($_GET['cmd'], 'r')` must fire.
    #[test]
    fn test_b8_php_popen_with_user_input_detected() {
        let content = "<?php\nfunction run() {\n    popen($_GET['cmd'], 'r');\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("vuln.php", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "popen($_GET['cmd'], 'r') must produce a finding. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// B9 (LOW): `process.exec(x)` is not a real Node API; must not fire.
    #[test]
    fn test_b9_process_exec_does_not_fire_as_child_process() {
        let content = "class Runner {}\nconst process = new Runner();\nprocess.exec = function(x){};\nfunction run(input) {\n    process.exec(input);\n}\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("safe.js", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "`process.exec(x)` is not a real Node API and must not fire. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    // ----- Python from-import alias resolution (closes deferred B10) -----

    /// `from os import system; system(user_input)` — bare-call must
    /// fire. Previously missed because the matcher only inspected
    /// `attribute` callees. Mirrors `insecure_crypto`'s
    /// `test_python_bare_md5_after_from_import`.
    #[test]
    fn test_python_bare_system_after_from_import() {
        let content = "from os import system\n\ndef run(user_input):\n    system(user_input)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("h.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.iter().any(|f| f.line_start == Some(4)),
            "Should fire on `system(user_input)` after `from os import system`. Got: {:?}",
            findings
                .iter()
                .map(|f| (f.line_start, &f.title))
                .collect::<Vec<_>>()
        );
    }

    /// `from subprocess import run; run(user_input, shell=True)` —
    /// bare-call with shell=True must fire Critical.
    #[test]
    fn test_python_bare_subprocess_run_after_from_import() {
        let content =
            "from subprocess import run\n\ndef go(user_input):\n    run(user_input, shell=True)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("h.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings
                .iter()
                .any(|f| f.line_start == Some(4) && f.severity == Severity::Critical),
            "Should fire Critical on `run(user_input, shell=True)` after `from subprocess import run`. Got: {:?}",
            findings
                .iter()
                .map(|f| (f.line_start, f.severity, &f.title))
                .collect::<Vec<_>>()
        );
    }

    /// Audit shape: `import subprocess as sp; sp.run(user, shell=True)`.
    ///
    /// `sp.run(...)` is an attribute call whose object text is `"sp"`,
    /// not `"subprocess"`. Without the module-alias resolver the
    /// `classify_python_command_callee("sp", "run", ...)` lookup
    /// misses (no entry under `"sp"`). Mirrors
    /// `test_python_bare_subprocess_run_after_from_import`, but for
    /// the `import M as N` shape.
    #[test]
    fn test_python_aliased_module_subprocess_run_detected() {
        let content =
            "import subprocess as sp\n\ndef go(user_input):\n    sp.run(user_input, shell=True)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("h.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings
                .iter()
                .any(|f| f.line_start == Some(4) && f.severity == Severity::Critical),
            "Should fire Critical on `sp.run(user_input, shell=True)` after `import subprocess as sp`. Got: {:?}",
            findings
                .iter()
                .map(|f| (f.line_start, f.severity, &f.title))
                .collect::<Vec<_>>()
        );
    }

    // -------------------------------------------------------------
    // B15: shell metacharacters in static-literal commands.
    // -------------------------------------------------------------

    /// `os.system("ls; rm -rf /")` — literal but with `;` shell
    /// metachar. Should fire at Medium (not skipped as Low literal).
    #[test]
    fn test_b15_static_literal_with_semicolon_chain_python_os_system() {
        let content = "import os\nos.system(\"ls; rm -rf /\")\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("a.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection");
        assert!(
            findings.iter().any(|f| f.severity == Severity::Medium),
            "B15: static literal with `;` must fire Medium. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }

    /// `subprocess.run("cat foo | grep bar", shell=True)` — pipe
    /// metachar in a static literal must fire Medium.
    #[test]
    fn test_b15_static_literal_with_pipe_python_subprocess_shell_true() {
        let content = "import subprocess\nsubprocess.run(\"cat foo | grep bar\", shell=True)\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("a.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection");
        assert!(
            findings.iter().any(|f| f.severity == Severity::Medium),
            "B15: static literal with `|` must fire Medium. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }

    /// `os.system("echo $(date)")` — `$(...)` command substitution in
    /// a static literal must fire Medium.
    #[test]
    fn test_b15_static_literal_with_dollar_paren_subst_python() {
        let content = "import os\nos.system(\"echo $(date)\")\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("a.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection");
        assert!(
            findings.iter().any(|f| f.severity == Severity::Medium),
            "B15: static literal with `$(` must fire Medium. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }

    /// `os.system("date")` — no metachars; must NOT fire Medium+ (the
    /// existing Low classification is filtered by the post-processor).
    #[test]
    fn test_b15_static_literal_no_metachar_still_low_or_skipped() {
        let content = "import os\nos.system(\"date\")\n";
        let store = GraphBuilder::new().freeze();
        let detector = CommandInjectionDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("a.py", content)],
        );
        let findings = detector.detect(&ctx).expect("detection");
        let medium_or_above = findings
            .iter()
            .filter(|f| f.severity >= Severity::Medium)
            .count();
        assert_eq!(
            medium_or_above,
            0,
            "Static literal `date` (no metachar) must not fire Medium+. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.severity))
                .collect::<Vec<_>>()
        );
    }
}