repotoire 0.8.0

//! Dual-branch predictor for Python command-injection sites.
//!
//! Implements decisions D1 (weights) and D3 (severity) from
//! `docs/superpowers/specs/2026-05-09-dual-branch-phase2-command-injection-decisions.md`.
//!
//! # What this module does
//!
//! Given a Python call site for a command-exec API
//! (`os.system`, `subprocess.run`, `subprocess.Popen`, ...), produce a
//! [`Prediction`] that:
//!
//! 1. Picks `RealBug` or `Benign` as the predicted branch.
//! 2. Carries the other branch as the alternative.
//! 3. Lists typed [`PredictionReason`]s the predictor used.
//! 4. Optionally lists [`ResolutionSignal`]s (collapsing or hint-grade).
//!
//! # Pipeline
//!
//! ```text
//! call_node ──extract_evidence──> Evidence ──predict──> Prediction
//! ```
//!
//! `extract_evidence` is pure AST traversal; `predict` is pure scoring.
//!
//! # Sign convention
//!
//! `weight > 0` leans **Benign**; `weight < 0` leans **RealBug**.
//!
//! # Severity mapping (D3 deviation from 2a/2b)
//!
//! - Predicted **RealBug** → `CommandApi::severity_for(api, arg_kind,
//!   literal_text)` — the existing 2D severity table at `mod.rs:291`.
//!   This preserves the B6 (shell-c always Critical) and B15
//!   (shell-mode literal-with-metachars Medium) audit calibration that
//!   a flat `High` mapping would erase.
//! - Predicted **Benign** → `Severity::Info`.
//! - Alternative branch carries the opposite label's severity.
//!
//! # Resolution signals (collapsing)
//!
//! Two annotations fully collapse the prediction:
//!
//! - `# repotoire: command-static[<reason>]` → `Benign` (Info).
//! - `# repotoire: command-user-controlled[<source>]` → `RealBug`
//!   (severity from the 2D table).
//!
//! Both are surfaced as [`ResolutionSignal`]s so the developer who
//! disagrees with the predictor has a documented mechanism for forcing
//! the desired branch.
//!
//! # Why these weights
//!
//! See decision **D1**. Numbers tagged `TUNABLE`. Phase 3 misprediction
//! logging is the right place to retune.

use super::annotation::parse_python_comment;
use super::{CommandApi, CommandArgKind};
use crate::dual_branch::{
    AlternativeBranch, BranchLabel, PredictionReason, PredictionReasonKind, ResolutionKind,
    ResolutionSignal,
};
use crate::models::Severity;

// ─────────────────────────────────────────────────────────────────────────────
// Tunable weights
// ──────────���──────────────────────────────────────────────────────────────────

// TUNABLE: see Phase 3 misprediction logging.
//
// Sign convention: positive leans Benign, negative leans RealBug.
//
// The magnitudes are calibrated so that:
//   * `subprocess.run(["git", "status"])` (all-literals list) sums to
//     `+0.50` → strongly Benign.
//   * `subprocess.run(f"echo {name}", shell=True)` where `name` is a
//     function param sums to `-0.40 + -0.30 = -0.70` → strongly RealBug.
//   * `subprocess.run(["xdg-open", url])` with literal argv[0] sums to
//     `+0.30` → Benign with alternative `RealBug / Low`.
// See the decisions doc D1 worked examples for the full pinning.

/// `shell=True` kwarg is present. Bandit B602/B605 canonical RCE smell.
const W_KW_SHELL_TRUE: f32 = -0.40;

/// `argv[0]` in a list-form call is a function parameter (the
/// attacker chooses which binary runs).
const W_ARGV0_IS_PARAMETER: f32 = -0.50;

/// First positional argument originates from a request object
/// (`request.GET[...]`, `flask.request.args`, etc.).
const W_FIRST_ARG_REQUEST_SOURCE: f32 = -0.50;

/// Enclosing function name looks like a request handler (`getX`,
/// `postX`, `handler`, `route`, `endpoint`, `view`, `controller`,
/// `middleware`, `request`, `response`).
const W_ENCLOSING_HANDLER: f32 = -0.30;

/// Argv list is all static literals (`subprocess.run(["git", "status"])`).
/// The strongest single Benign signal.
const W_ARGV_LIST_ALL_LITERALS: f32 = 0.50;

/// `argv[0]` is a string literal (with variable later args). Fixed
/// argv[0] caps damage to argument injection (CWE-88) under
/// `shell=False`.
const W_ARGV0_IS_LITERAL: f32 = 0.30;

/// First positional argument sourced from a config module
/// (`os.environ.get(...)`, `settings.X`, `config.X`).
const W_FIRST_ARG_CONFIG_SOURCE: f32 = 0.30;

/// Enclosing function name looks like a test/fixture.
const W_ENCLOSING_TEST_FUNCTION: f32 = 0.15;

// ─────────────────────────────────────────────────────────────────────────────
// Lexicons used by source-classification helpers
// ─────────────────────────────────────────────────────────────────────────────

/// Substrings that identify a request object (case-insensitive prefix
/// match on the leftmost identifier of the expression). Mirrors 2b.
const REQUEST_OBJECT_SUBSTRINGS: &[&str] = &[
    "request.",
    "req.",
    "flask.request",
    "event.",
    "self.request",
];

/// Substrings that identify a config/env source. Mirrors 2b.
const CONFIG_OBJECT_SUBSTRINGS: &[&str] = &[
    "settings.",
    "config.",
    "os.environ",
    "os.path.expanduser",
    "self.config",
    "self.settings",
];

/// Substrings that identify test code.
const TEST_FUNCTION_SUBSTRINGS: &[&str] = &["test_", "_test", "fixture", "setup", "teardown"];

/// Substrings that identify a request-handler function (case-insensitive
/// substring match). Mirror of the existing `HANDLER_VERB_RE` heuristic
/// at `mod.rs:863` minus the regex special-casing of HTTP-verb prefixes
/// (the verb-prefix case lives in [`matches_handler_function`]).
const HANDLER_FUNCTION_SUBSTRINGS: &[&str] = &[
    "handler",
    "route",
    "endpoint",
    "view",
    "controller",
    "middleware",
    "request",
    "response",
];

// ─────────────────────────────────────────────────────────────────────────────
// Evidence
// ─────────────────────────────────────────────────────────────────────────────

/// First positional arg's origin (when the arg is a bare identifier or
/// recognizable attribute chain). Mirrors 2b's `FirstArgOrigin`.
#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) enum FirstArgOrigin {
    /// Inline string literal.
    Literal,
    /// Variable bound from a config module / env.
    ConfigSource,
    /// Subscript or attribute on a request object.
    RequestSource,
    /// A bare function parameter (possibly user-controlled).
    Parameter { name: String },
    /// Anything else. No signal.
    Unknown,
}

/// Origin classification of `argv[0]` in a list-form call. Distinct
/// from `FirstArgOrigin` because the list-form has different semantics
/// (the first arg is the **list**; argv[0] is the first ELEMENT of
/// that list). Tracked separately so the predictor can emit
/// `argv0_is_parameter` (`-0.50`) without also firing the unrelated
/// `first_arg_origin = Parameter` signal.
#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) enum Argv0Origin {
    /// String literal — `subprocess.run(["git", ...])`.
    Literal,
    /// Function parameter — `subprocess.run([user_bin, ...])`.
    Parameter { name: String },
    /// Anything else (`["sh", "-c", ...]` is handled by the shell-c
    /// classification before this point, so it doesn't show up here).
    Other,
}

/// Structured evidence extracted from a command-exec call site, ready
/// for scoring. Field semantics mirror Phase 2a/2b's `Evidence` shape.
#[derive(Debug, Clone, Default, PartialEq)]
pub(super) struct Evidence {
    /// Name of the enclosing function, if any.
    pub enclosing_function: Option<String>,

    /// Name of the enclosing class, if any (informational; no weight).
    pub enclosing_class: Option<String>,

    /// `shell=True` kwarg present on the call.
    pub kw_shell_true: bool,

    /// First positional argument's origin (for string-form calls like
    /// `os.system(cmd)` or `subprocess.run(cmd, shell=True)`).
    pub first_arg_origin: Option<FirstArgOrigin>,

    /// `argv[0]` origin (for list-form calls like
    /// `subprocess.run(["bin", "arg"])`). `None` when the call isn't
    /// list-form.
    pub argv0_origin: Option<Argv0Origin>,

    /// True if the call is list-form AND every element is a static
    /// literal. The strongest Benign signal.
    pub argv_list_all_literals: bool,

    /// `Some(reason)` if a `# repotoire: command-static[<reason>]`
    /// annotation appears on the call line. **Collapsing**.
    pub command_static_annotation: Option<String>,

    /// `Some(source)` if a `# repotoire: command-user-controlled[<source>]`
    /// annotation appears on the call line. **Collapsing**.
    pub command_user_controlled_annotation: Option<String>,
}

impl Evidence {
    #[cfg(test)]
    pub(super) fn empty() -> Self {
        Self::default()
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// Prediction
// ─────────────────────────────────────────────────────────────────────────────

#[derive(Debug, Clone)]
pub(super) struct Prediction {
    pub predicted: BranchLabel,
    pub alternative_branch: AlternativeBranch,
    pub predicted_severity: Severity,
    pub reasons: Vec<PredictionReason>,
    pub resolutions: Vec<ResolutionSignal>,
}

// ─────────────────────────────────────────────────────────────────────────────
// Scorer
// ─────────────────────────────────────────────────────────────────────────────

/// Build a [`Prediction`] from extracted [`Evidence`] and the call's
/// `(api, arg_kind, literal_text)` tuple.
///
/// # Algorithm
///
/// 1. **Collapsing signals first.** If `command_static_annotation` or
///    `command_user_controlled_annotation` is set, commit to the
///    corresponding branch with confidence 1.0 and skip weighted scoring.
/// 2. **Weighted scoring.** Sum weights for each present signal.
/// 3. **Tiebreak**: sum exactly 0.0 → predict RealBug. Conservative
///    default for security findings.
///
/// # Severity mapping
///
/// - Predicted RealBug → `CommandApi::severity_for(api, arg_kind, literal_text)`.
/// - Predicted Benign → `Severity::Info`.
pub(super) fn predict(
    evidence: &Evidence,
    api: CommandApi,
    arg_kind: CommandArgKind,
    literal_text: Option<&str>,
) -> Prediction {
    let api_label = api.callee_label();

    // ── Step 1: collapsing annotations. ──
    if let Some(reason) = &evidence.command_static_annotation {
        return collapse(
            BranchLabel::Benign,
            api,
            arg_kind,
            literal_text,
            ResolutionSignal {
                kind: ResolutionKind::SourceAnnotation {
                    syntax: format!("# repotoire: command-static[{reason}]"),
                },
                description: format!(
                    "`command-static[{reason}]` annotation declares this \
                     command-exec call as protected by caller-side \
                     validation; the finding collapses to Info."
                ),
                example: Some(format!(
                    "{api_label}(...)  # repotoire: command-static[{reason}]"
                )),
                collapses_to: BranchLabel::Benign,
            },
            PredictionReason {
                kind: PredictionReasonKind::Custom {
                    description: format!("command-static[{reason}] annotation"),
                },
                weight: 1.0,
                note: format!(
                    "Annotated as caller-validated ({reason}); not a \
                     command-injection risk."
                ),
            },
        );
    }
    if let Some(source) = &evidence.command_user_controlled_annotation {
        return collapse(
            BranchLabel::RealBug,
            api,
            arg_kind,
            literal_text,
            ResolutionSignal {
                kind: ResolutionKind::SourceAnnotation {
                    syntax: format!("# repotoire: command-user-controlled[{source}]"),
                },
                description: format!(
                    "`command-user-controlled[{source}]` annotation \
                     declares the command argument as attacker-controlled; \
                     the finding stays at the existing severity."
                ),
                example: Some(format!(
                    "{api_label}(...)  # repotoire: command-user-controlled[{source}]"
                )),
                collapses_to: BranchLabel::RealBug,
            },
            PredictionReason {
                kind: PredictionReasonKind::Custom {
                    description: format!("command-user-controlled[{source}] annotation"),
                },
                weight: -1.0,
                note: format!("Annotated as user-controlled (source: {source})."),
            },
        );
    }

    // ── Step 2: weighted scoring. ──
    let mut sum: f32 = 0.0;
    let mut reasons: Vec<PredictionReason> = Vec::new();

    if evidence.kw_shell_true {
        sum += W_KW_SHELL_TRUE;
        reasons.push(PredictionReason {
            kind: PredictionReasonKind::KeywordArgument {
                name: "shell".to_string(),
                value: "True".to_string(),
            },
            weight: W_KW_SHELL_TRUE,
            note: "`shell=True` makes the call interpret its argument \
                   through `/bin/sh`; canonical RCE smell."
                .to_string(),
        });
    }

    if evidence.argv_list_all_literals {
        sum += W_ARGV_LIST_ALL_LITERALS;
        reasons.push(PredictionReason {
            kind: PredictionReasonKind::StructuralPattern {
                description: "argv list is all static literals".to_string(),
            },
            weight: W_ARGV_LIST_ALL_LITERALS,
            note: "Every element of the argv list is a string literal; \
                   neither the binary nor any argument can be attacker-\
                   chosen."
                .to_string(),
        });
    }

    if let Some(origin) = &evidence.argv0_origin {
        match origin {
            Argv0Origin::Literal if !evidence.argv_list_all_literals => {
                sum += W_ARGV0_IS_LITERAL;
                reasons.push(PredictionReason {
                    kind: PredictionReasonKind::StructuralPattern {
                        description: "argv[0] is a string literal".to_string(),
                    },
                    weight: W_ARGV0_IS_LITERAL,
                    note: "Fixed argv[0] caps damage to argument injection \
                           (CWE-88) under `shell=False`; the executed \
                           binary cannot be attacker-chosen."
                        .to_string(),
                });
            }
            Argv0Origin::Literal => {
                // Already captured by `argv_list_all_literals`; skip to
                // avoid double-counting.
            }
            Argv0Origin::Parameter { name } => {
                sum += W_ARGV0_IS_PARAMETER;
                reasons.push(PredictionReason {
                    kind: PredictionReasonKind::FirstArgIdentifier { name: name.clone() },
                    weight: W_ARGV0_IS_PARAMETER,
                    note: format!(
                        "argv[0] is `{name}`, a function parameter; the \
                         attacker chooses which binary runs."
                    ),
                });
            }
            Argv0Origin::Other => {
                // No signal.
            }
        }
    }

    if let Some(origin) = &evidence.first_arg_origin {
        match origin {
            FirstArgOrigin::Literal => {
                // No positive weight for string-form literal first arg;
                // the existing `severity_for(api, StaticLiteral)` already
                // returns `Low` for this shape, which is the right
                // alternative-branch severity. Adding a Benign weight
                // here would double-count.
            }
            FirstArgOrigin::ConfigSource => {
                sum += W_FIRST_ARG_CONFIG_SOURCE;
                reasons.push(PredictionReason {
                    kind: PredictionReasonKind::StructuralPattern {
                        description: "first arg sourced from config module".to_string(),
                    },
                    weight: W_FIRST_ARG_CONFIG_SOURCE,
                    note: "First argument originates from a config/env \
                           source (`os.environ`, `settings`, `config`); \
                           the project owns this value."
                        .to_string(),
                });
            }
            FirstArgOrigin::RequestSource => {
                sum += W_FIRST_ARG_REQUEST_SOURCE;
                reasons.push(PredictionReason {
                    kind: PredictionReasonKind::StructuralPattern {
                        description: "first arg from request object".to_string(),
                    },
                    weight: W_FIRST_ARG_REQUEST_SOURCE,
                    note: "First argument originates from a request object \
                           (`request.GET`, `flask.request.args`, ...); \
                           attacker-controlled."
                        .to_string(),
                });
            }
            FirstArgOrigin::Parameter { name } => {
                // Only count when there's no argv0 signal already (the
                // list-form param case is captured by `argv0_origin`).
                if evidence.argv0_origin.is_none() {
                    sum += W_ARGV0_IS_PARAMETER;
                    reasons.push(PredictionReason {
                        kind: PredictionReasonKind::FirstArgIdentifier { name: name.clone() },
                        weight: W_ARGV0_IS_PARAMETER,
                        note: format!(
                            "First argument is `{name}`, a function \
                             parameter; possibly user-controlled."
                        ),
                    });
                }
            }
            FirstArgOrigin::Unknown => {
                // No signal.
            }
        }
    }

    if let Some(fn_name) = &evidence.enclosing_function {
        if matches_test_function(fn_name) {
            sum += W_ENCLOSING_TEST_FUNCTION;
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::EnclosingScope {
                    scope_kind: "function".to_string(),
                    name: fn_name.clone(),
                },
                weight: W_ENCLOSING_TEST_FUNCTION,
                note: format!(
                    "Enclosing function `{fn_name}` looks like a \
                     test/fixture; test code rarely the actionable \
                     security target."
                ),
            });
        } else if matches_handler_function(fn_name) {
            sum += W_ENCLOSING_HANDLER;
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::EnclosingScope {
                    scope_kind: "request_handler".to_string(),
                    name: fn_name.clone(),
                },
                weight: W_ENCLOSING_HANDLER,
                note: format!(
                    "Enclosing function `{fn_name}` looks like a request \
                     handler (`handler`/`route`/`endpoint`/`view`/\
                     `controller`/`middleware`/HTTP-verb-prefix); higher \
                     prior on attacker-reachable code."
                ),
            });
        }
    }

    // ── Step 3: tiebreak + severity mapping. ──
    let predicted = if sum > 0.0 {
        BranchLabel::Benign
    } else {
        // Strict 0.0 tiebreak: lean RealBug. Conservative default
        // matching Phase 2a/2b/2c behavior for security findings.
        BranchLabel::RealBug
    };

    build_prediction(predicted, api, arg_kind, literal_text, reasons, Vec::new())
}

// ─────────────────────────────────────────────────────────────────────────────
// Helpers
// ─────────────────────────────────────────────────────────────────────────────

pub(super) fn matches_request_object(text: &str) -> bool {
    let lower = text.to_lowercase();
    REQUEST_OBJECT_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(sub))
}

pub(super) fn matches_config_object(text: &str) -> bool {
    let lower = text.to_lowercase();
    CONFIG_OBJECT_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(sub))
}

fn matches_test_function(name: &str) -> bool {
    let lower = name.to_lowercase();
    TEST_FUNCTION_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(sub))
}

/// Mirrors the existing `HANDLER_VERB_RE` heuristic at
/// `command_injection/mod.rs:863` plus the `handler`/`route`/... lexicon.
fn matches_handler_function(name: &str) -> bool {
    let lower = name.to_lowercase();
    if HANDLER_FUNCTION_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(sub))
    {
        return true;
    }
    // HTTP-verb prefix: `getX`, `postX`, etc. (camelCase). We approximate
    // the regex `^(get|post|put|delete|patch|head|options)[A-Z]` without
    // pulling in `regex` here.
    const VERBS: &[&str] = &["get", "post", "put", "delete", "patch", "head", "options"];
    for verb in VERBS {
        if let Some(rest) = name.strip_prefix(verb) {
            if let Some(next) = rest.chars().next() {
                if next.is_ascii_uppercase() {
                    return true;
                }
            }
        }
    }
    false
}

fn collapse(
    label: BranchLabel,
    api: CommandApi,
    arg_kind: CommandArgKind,
    literal_text: Option<&str>,
    resolution: ResolutionSignal,
    reason: PredictionReason,
) -> Prediction {
    build_prediction(
        label,
        api,
        arg_kind,
        literal_text,
        vec![reason],
        vec![resolution],
    )
}

fn build_prediction(
    predicted: BranchLabel,
    api: CommandApi,
    arg_kind: CommandArgKind,
    literal_text: Option<&str>,
    reasons: Vec<PredictionReason>,
    resolutions: Vec<ResolutionSignal>,
) -> Prediction {
    let api_label = api.callee_label();
    let predicted_severity = severity_for_branch(predicted, api, arg_kind, literal_text);
    let alternative_label = predicted.opposite();
    let alternative_severity = severity_for_branch(alternative_label, api, arg_kind, literal_text);

    let alternative_branch = AlternativeBranch {
        label: alternative_label,
        severity: alternative_severity,
        title: title_for_branch(alternative_label, api_label),
        description: description_for_branch(alternative_label, api_label),
        suggested_fix: suggested_fix_for_branch(alternative_label, api_label),
    };

    Prediction {
        predicted,
        alternative_branch,
        predicted_severity,
        reasons,
        resolutions,
    }
}

/// D3 deviation from 2a/2b: RealBug severity comes from the existing
/// `severity_for(api, arg_kind, literal_text)` 2D table; Benign → Info.
fn severity_for_branch(
    label: BranchLabel,
    api: CommandApi,
    arg_kind: CommandArgKind,
    literal_text: Option<&str>,
) -> Severity {
    match label {
        BranchLabel::RealBug => api.severity_for(arg_kind, literal_text),
        BranchLabel::Benign => Severity::Info,
    }
}

fn title_for_branch(label: BranchLabel, api_label: &str) -> String {
    match label {
        BranchLabel::RealBug => format!("Potential command injection via {api_label}"),
        BranchLabel::Benign => {
            format!("Internal command-exec call via {api_label} (informational)")
        }
    }
}

fn description_for_branch(label: BranchLabel, api_label: &str) -> String {
    match label {
        BranchLabel::RealBug => format!(
            "The argument to `{api_label}` appears attacker-influenceable. \
             OS-command-execution APIs run their argument as a shell or \
             argv list. When that argument is anything other than a \
             constant the program author controls at write time, \
             attackers who can influence the value get arbitrary command \
             execution."
        ),
        BranchLabel::Benign => format!(
            "The argument to `{api_label}` appears to be a literal or \
             config-derived value. The call site is carried as Info; the \
             RealBug interpretation is preserved in `alternative_branch` \
             in case the predictor is wrong."
        ),
    }
}

fn suggested_fix_for_branch(label: BranchLabel, _api_label: &str) -> Option<String> {
    match label {
        BranchLabel::RealBug => Some(
            "Use the list form with a fixed argv[0]: \
             `subprocess.run([\"cmd\", arg1, arg2], shell=False)`. \
             Validate any user-controlled later argv elements against an \
             allowlist. Avoid `shell=True` and `os.system` / `os.popen` \
             entirely."
                .to_string(),
        ),
        BranchLabel::Benign => Some(
            "If this is intentional internal use, annotate \
             `# repotoire: command-static[<reason>]` to collapse the \
             finding to Info definitively."
                .to_string(),
        ),
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// Annotation lookup helpers (called by evidence extraction)
// ─────────────────────────────────────────────────────────────────────────────

/// If `line` carries `# repotoire: command-static[<reason>]`, return
/// the reason. Defaults to `"unspecified"` if no arg supplied.
pub(super) fn extract_command_static_reason(line: &str) -> Option<String> {
    let ann = parse_python_comment(line)?;
    if ann.kind != "command-static" {
        return None;
    }
    if ann.args.is_empty() {
        Some("unspecified".to_string())
    } else {
        Some(ann.args[0].clone())
    }
}

/// If `line` carries `# repotoire: command-user-controlled[<source>]`,
/// return the source. Defaults to `"unspecified"` if no arg supplied.
pub(super) fn extract_command_user_controlled_source(line: &str) -> Option<String> {
    let ann = parse_python_comment(line)?;
    if ann.kind != "command-user-controlled" {
        return None;
    }
    if ann.args.is_empty() {
        Some("unspecified".to_string())
    } else {
        Some(ann.args[0].clone())
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// Tests
// ─────────────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    // ─── Worked example 1 (decisions D1): hardcoded literal list. ───
    #[test]
    fn argv_list_all_literals_predicts_benign_info() {
        let evidence = Evidence {
            argv_list_all_literals: true,
            argv0_origin: Some(Argv0Origin::Literal),
            ..Default::default()
        };
        let p = predict(
            &evidence,
            CommandApi::PySubprocessNoShell,
            CommandArgKind::StaticList,
            None,
        );
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
        // Alternative: RealBug at severity_for(PySubprocessNoShell, StaticList) = Low.
        assert_eq!(p.alternative_branch.label, BranchLabel::RealBug);
        assert_eq!(p.alternative_branch.severity, Severity::Low);
        // Only one signal fires (no double-count of argv0_is_literal).
        assert_eq!(p.reasons.len(), 1);
    }

    // ─── Worked example 2 (decisions D1): shell=True with interpolation. ───
    #[test]
    fn shell_true_with_param_interpolation_predicts_realbug_critical() {
        let evidence = Evidence {
            kw_shell_true: true,
            first_arg_origin: Some(FirstArgOrigin::Parameter {
                name: "name".to_string(),
            }),
            ..Default::default()
        };
        let p = predict(
            &evidence,
            CommandApi::PySubprocessShell,
            CommandArgKind::Interpolated,
            None,
        );
        assert_eq!(p.predicted, BranchLabel::RealBug);
        // severity_for(PySubprocessShell, Interpolated) = Critical.
        assert_eq!(p.predicted_severity, Severity::Critical);
        let total: f32 = p.reasons.iter().map(|r| r.weight).sum();
        assert!(
            (total - (W_KW_SHELL_TRUE + W_ARGV0_IS_PARAMETER)).abs() < 1e-6,
            "expected -0.70, got {total}"
        );
    }

    // ─── Worked example 3 (decisions D1): shell=True literal command. ───
    //
    // `subprocess.run(f"git status", shell=True)` — literal text, no
    // interpolation, but `shell=True`. Sum = -0.40, predicted RealBug,
    // but the existing severity table says (PySubprocessShell,
    // StaticLiteral, no metachars) → Low. So this surfaces as
    // RealBug / Low, which the flag-on path keeps (drop-Low disabled).
    #[test]
    fn shell_true_static_literal_predicts_realbug_low() {
        let evidence = Evidence {
            kw_shell_true: true,
            ..Default::default()
        };
        let p = predict(
            &evidence,
            CommandApi::PySubprocessShell,
            CommandArgKind::StaticLiteral,
            Some("git status"),
        );
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Low);
        assert_eq!(p.alternative_branch.label, BranchLabel::Benign);
        assert_eq!(p.alternative_branch.severity, Severity::Info);
    }

    // ─── Worked example 4 (decisions D1): MixedListLiteralArgv0. ───
    //
    // `subprocess.run(["xdg-open", url])` where `url` is a param.
    // The argv0_is_literal signal fires (+0.30), no negative signal.
    // Predicted Benign / Info; alternative: existing severity table
    // gives (PySubprocessNoShell, MixedListLiteralArgv0) = Low.
    #[test]
    fn mixed_list_literal_argv0_predicts_benign_info() {
        let evidence = Evidence {
            argv0_origin: Some(Argv0Origin::Literal),
            // not all-literals — variable later arg
            argv_list_all_literals: false,
            ..Default::default()
        };
        let p = predict(
            &evidence,
            CommandApi::PySubprocessNoShell,
            CommandArgKind::MixedListLiteralArgv0,
            None,
        );
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
        assert_eq!(p.alternative_branch.severity, Severity::Low);
    }

    // ─── Collapsing annotations ───
    #[test]
    fn command_static_annotation_collapses_to_benign() {
        let evidence = Evidence {
            // Other signals say RealBug, but the annotation overrides.
            kw_shell_true: true,
            first_arg_origin: Some(FirstArgOrigin::RequestSource),
            command_static_annotation: Some("allowlisted-by-caller".to_string()),
            ..Default::default()
        };
        let p = predict(
            &evidence,
            CommandApi::PySubprocessShell,
            CommandArgKind::Interpolated,
            None,
        );
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
        assert_eq!(p.resolutions.len(), 1);
        assert!(matches!(
            p.resolutions[0].kind,
            ResolutionKind::SourceAnnotation { .. }
        ));
    }

    #[test]
    fn command_user_controlled_annotation_collapses_to_realbug() {
        let evidence = Evidence {
            // Other signals say Benign, but the annotation overrides.
            argv_list_all_literals: true,
            argv0_origin: Some(Argv0Origin::Literal),
            command_user_controlled_annotation: Some("env-var".to_string()),
            ..Default::default()
        };
        let p = predict(
            &evidence,
            CommandApi::PySubprocessNoShell,
            CommandArgKind::StaticList,
            None,
        );
        assert_eq!(p.predicted, BranchLabel::RealBug);
        // severity_for(PySubprocessNoShell, StaticList) = Low under
        // the existing table — the annotation forces the branch but
        // the severity comes from the 2D table.
        assert_eq!(p.predicted_severity, Severity::Low);
    }

    // ─── Tiebreak ───
    #[test]
    fn empty_evidence_tiebreaks_realbug() {
        let p = predict(
            &Evidence::empty(),
            CommandApi::PyOsSystem,
            CommandArgKind::Unknown,
            None,
        );
        assert_eq!(p.predicted, BranchLabel::RealBug);
        // severity_for(PyOsSystem [shell], Unknown) = High.
        assert_eq!(p.predicted_severity, Severity::High);
    }

    // ─── Sign convention ───
    //
    // These tests pin the sign convention. Clippy complains about
    // `assert!` on constant expressions, but the whole point is that a
    // future edit changing a sign would silently flip the predicted
    // branch — these assertions catch that at test time.
    #[test]
    #[allow(clippy::assertions_on_constants)]
    fn handler_scope_weight_is_negative() {
        assert!(W_ENCLOSING_HANDLER < 0.0);
        assert!(W_KW_SHELL_TRUE < 0.0);
        assert!(W_FIRST_ARG_REQUEST_SOURCE < 0.0);
        assert!(W_ARGV0_IS_PARAMETER < 0.0);
    }

    #[test]
    #[allow(clippy::assertions_on_constants)]
    fn benign_signal_weights_are_positive() {
        assert!(W_ARGV_LIST_ALL_LITERALS > 0.0);
        assert!(W_ARGV0_IS_LITERAL > 0.0);
        assert!(W_FIRST_ARG_CONFIG_SOURCE > 0.0);
        assert!(W_ENCLOSING_TEST_FUNCTION > 0.0);
    }

    // ─── Lexicon checks ───
    #[test]
    fn handler_lexicon_matches_camel_verb_prefix() {
        assert!(matches_handler_function("getUserById"));
        assert!(matches_handler_function("postOrder"));
        assert!(matches_handler_function("deleteAccount"));
        assert!(!matches_handler_function("get_user_by_id"));
        assert!(!matches_handler_function("getfoo")); // no uppercase after
    }

    #[test]
    fn handler_lexicon_matches_substring() {
        assert!(matches_handler_function("user_route"));
        assert!(matches_handler_function("loginHandler"));
        assert!(matches_handler_function("upload_endpoint"));
        assert!(!matches_handler_function("calculate_total"));
    }

    #[test]
    fn request_object_lexicon() {
        assert!(matches_request_object("request.GET[\"foo\"]"));
        assert!(matches_request_object("req.body.cmd"));
        assert!(matches_request_object("flask.request.args"));
        assert!(!matches_request_object("settings.BASE_DIR"));
    }

    #[test]
    fn config_object_lexicon() {
        assert!(matches_config_object("os.environ.get(\"X\")"));
        assert!(matches_config_object("settings.BASE_DIR"));
        assert!(matches_config_object("config.DEBUG"));
        assert!(!matches_config_object("request.args.get(\"x\")"));
    }

    #[test]
    fn test_function_lexicon() {
        assert!(matches_test_function("test_subprocess_run"));
        assert!(matches_test_function("subprocess_test"));
        assert!(matches_test_function("setup_fixture"));
        assert!(!matches_test_function("run_command"));
    }

    // ─── Extract helpers ───
    #[test]
    fn extract_command_static_with_reason() {
        assert_eq!(
            extract_command_static_reason(
                "subprocess.run(...)  # repotoire: command-static[validated]"
            ),
            Some("validated".to_string())
        );
    }

    #[test]
    fn extract_command_static_without_reason() {
        assert_eq!(
            extract_command_static_reason("subprocess.run(...)  # repotoire: command-static"),
            Some("unspecified".to_string())
        );
    }

    #[test]
    fn extract_command_user_controlled_with_source() {
        assert_eq!(
            extract_command_user_controlled_source(
                "subprocess.run(...)  # repotoire: command-user-controlled[GET-request]"
            ),
            Some("GET-request".to_string())
        );
    }

    #[test]
    fn extract_command_static_ignores_other_kinds() {
        // The 2b annotation kind must NOT match the 2d extractor.
        assert_eq!(
            extract_command_static_reason("os.path.join(...)  # repotoire: internal-path[ok]"),
            None
        );
    }

    #[test]
    fn extract_command_user_controlled_ignores_other_kinds() {
        assert_eq!(
            extract_command_user_controlled_source(
                "os.path.join(...)  # repotoire: user-controlled[GET]"
            ),
            None
        );
    }
}