repotoire 0.8.2

//! Dual-branch predictor for Python insecure-deserialization call sites.
//!
//! Implements decisions D1 (weights, with bidirectional Step 1.5 collapse:
//! D1.a Safe-API Benign collapse + D1.b Unsafe-API RealBug collapse) and
//! D3 (severity) from
//! `docs/superpowers/specs/2026-05-09-dual-branch-phase2-insecure-deserialize-decisions.md`.
//!
//! # What this module does
//!
//! Given a Python deserialize call site (`pickle.loads(...)`,
//! `yaml.safe_load(...)`, `json.loads(...)`, `yaml.load(...)`,
//! `marshal.loads(...)`, etc.), produce a [`Prediction`] that:
//!
//! 1. Picks `RealBug` or `Benign` as the predicted branch.
//! 2. Carries the other branch as the alternative.
//! 3. Lists typed [`PredictionReason`]s the predictor used.
//! 4. Optionally lists [`ResolutionSignal`]s (collapsing or hint-grade).
//!
//! # Bidirectional Step 1.5 collapse — **the structural novelty of 2h**
//!
//! Phase 2h is the **first phase natively designed with bidirectional
//! Step 1.5 collapse**:
//!
//! - **D1.a — Safe-API Benign-direction collapse.** When the call site
//!   uses a Safe-by-construction API (`yaml.safe_load`, `json.loads`,
//!   `yaml.load` *with* explicit `Loader=SafeLoader`/`CSafeLoader`,
//!   ruamel.yaml's `YAML(typ='safe')`), commit to **Benign / Info**
//!   regardless of any additive signals. Same family as 2e's
//!   defusedxml and 2f's advocate.
//!
//! - **D1.b — Unsafe-API RealBug-direction collapse.** When the call
//!   site uses an Unsafe-by-construction API (`pickle.loads`,
//!   `pickle.load`, `marshal.loads`, `marshal.load`, `yaml.load`
//!   *without* explicit safe Loader, `cPickle.loads`), commit to
//!   **RealBug / Critical** regardless of any additive signals. Same
//!   family as 2g's `'none'` collapse.
//!
//! The dual-branch architecture has matured through three stages:
//! 1. **2e/2f**: Benign-only collapses (library identity → safe-by-construction).
//! 2. **2g**: RealBug-direction collapse retrofitted (`'none'` argument value).
//! 3. **2h** (this phase): bidirectional from the start, because the
//!    deserialization domain has decisive evidence on BOTH sides:
//!    Safe APIs guarantee correctness; Unsafe APIs guarantee
//!    incorrectness — both via API identity.
//!
//! # Sign convention
//!
//! `weight > 0` leans **Benign**; `weight < 0` leans **RealBug**.
//!
//! # Severity mapping (D3)
//!
//! - Predicted **RealBug** via the Unsafe-API collapse → `Severity::Critical`.
//! - Predicted **RealBug** otherwise → `Critical` (`sum <= -0.7`),
//!   `High` (`-0.7 < sum <= -0.4`), `Medium` (shallow negative or tiebreak).
//! - Predicted **Benign** (via Safe-API collapse OR weighted-positive) → `Severity::Info`.
//!
//! # Resolution signals (collapsing)
//!
//! Four collapsing signals (two annotations + two API-name collapses):
//!
//! - `# repotoire: deserialize-safe[<reason>]` → `Benign` (Info).
//! - `# repotoire: deserialize-vulnerable[<source>]` → `RealBug` (severity per
//!   the legacy `categorize_deserialize` calibration).
//! - Safe-by-construction API in `DeserializeApi::Safe` → `Benign` (Info). **D1.a**.
//! - Unsafe-by-construction API in `DeserializeApi::Unsafe` → `RealBug` (Critical). **D1.b**.
//!
//! # Why these weights
//!
//! See decision **D1** (with §6 D1 amendment for the bidirectional
//! collapse). Numbers tagged `TUNABLE`. Phase 3 misprediction logging
//! is the right place to retune.

use super::annotation::parse_python_comment;
use crate::dual_branch::{
    AlternativeBranch, BranchLabel, PredictionReason, PredictionReasonKind, ResolutionKind,
    ResolutionSignal,
};
use crate::models::Severity;

// ─────────────────────────────────────────────────────────────────────────────
// DeserializeApi — the API-classification enum for Phase 2h
// ─────────────────────────────────────────────────────────────────────────────

/// Which Python deserialization API the call site uses, and what
/// safety contract the API provides.
///
/// The classification drives the Step 1.5 bidirectional collapses:
/// `Safe` → Benign collapse (D1.a); `Unsafe` → RealBug collapse (D1.b);
/// `Ambiguous` → fall through to weighted scoring.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) enum DeserializeApi {
    /// Safe-by-construction API. The library guarantees correctness
    /// regardless of input source. Examples: `yaml.safe_load`,
    /// `json.loads`, `json.load`, `yaml.load` *with* explicit
    /// `Loader=yaml.SafeLoader`/`CSafeLoader`, ruamel.yaml's
    /// `YAML(typ='safe').load`. Triggers D1.a Benign-direction collapse.
    Safe,
    /// Unsafe-by-construction API. The library affordances ARE the
    /// danger; correctness is impossible without a wrapper. Examples:
    /// `pickle.loads`, `pickle.load`, `cPickle.loads`, `marshal.loads`,
    /// `marshal.load`, `yaml.load` *without* explicit safe Loader.
    /// Triggers D1.b RealBug-direction collapse.
    Unsafe,
    /// API whose safety depends on additional context the v0 predictor
    /// cannot statically resolve. Examples: `yaml.load` with a dynamic
    /// (identifier-reference) `Loader=` value; `JSON.parse` in JS where
    /// prototype-pollution is a softer concern. Falls through to
    /// weighted scoring.
    Ambiguous,
    /// Recognized deserialize call but library not classified. Treated
    /// as Ambiguous for scoring; preserved as a distinct variant for
    /// labels/descriptions.
    Unknown,
}

impl DeserializeApi {
    /// Human-readable label for the API used in titles/descriptions.
    pub(super) fn callee_label(self) -> &'static str {
        match self {
            DeserializeApi::Safe => "safe-deserializer",
            DeserializeApi::Unsafe => "unsafe-deserializer",
            DeserializeApi::Ambiguous => "ambiguous-deserializer",
            DeserializeApi::Unknown => "deserialize client",
        }
    }

    /// True iff the API is one of the recognized Python deserialize
    /// families. Gates the Phase 2h dual-branch emission path: only
    /// recognized Python sites get the predictor-aware shape; non-
    /// Python and unrecognized calls still go through the legacy regex
    /// scanner per decisions D5 #5.
    pub(super) fn is_recognized(self) -> bool {
        !matches!(self, DeserializeApi::Unknown)
    }

    /// True iff this API triggers the D1.a Safe-direction Step 1.5
    /// collapse. Pulled out for readability — the predictor branches
    /// on this and `collapses_unsafe`.
    pub(super) fn collapses_safe(self) -> bool {
        matches!(self, DeserializeApi::Safe)
    }

    /// True iff this API triggers the D1.b Unsafe-direction Step 1.5
    /// collapse.
    pub(super) fn collapses_unsafe(self) -> bool {
        matches!(self, DeserializeApi::Unsafe)
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// Tunable weights
// ─────────────────────────────────────────────────────────────────────────────

// TUNABLE: see Phase 3 misprediction logging.
//
// Sign convention: positive leans Benign, negative leans RealBug.
//
// Calibration target (per decisions doc D1 worked examples Case A-F):
//   A. `yaml.safe_load(request.data)` in Flask handler:
//      Safe-API collapse → Benign / Info. ✅
//      (additive-only would be -0.50 - 0.30 = -0.80 → RealBug Critical. WRONG.)
//   B. `pickle.loads(f.read())` on local file:
//      Unsafe-API collapse → RealBug / Critical. ✅
//      (additive-only would be +0.10 → Benign Info. WRONG.)
//   C. `json.loads(request.body)` in Flask handler:
//      Safe-API collapse → Benign / Info. ✅
//   D. `yaml.load(f, Loader=yaml.SafeLoader)`:
//      Evidence extractor reclassifies API to Safe → Benign / Info. ✅
//   E. `pickle.loads(unsign(blob))` (Django signed-payload pattern):
//      v0 limitation: cannot trace cross-statement; user annotates.
//   F. `eval(request.data)`: out of scope; EvalDetector handles. D5 #4.

/// Informational weight for the D1.a Safe-API Step 1.5 Benign-direction
/// collapse. Same magnitude as `W_API_ADVOCATE_CALL` in Phase 2f and
/// `W_API_DEFUSEDXML_CALL` in Phase 2e — collapse signals carry a
/// magnitude-1 weight on the `PredictionReason` so a reader can tell
/// "this was a Step 1.5 collapse" from the weight alone.
pub(super) const W_API_SAFE_COLLAPSE: f32 = 1.0;

/// Informational weight for the D1.b Unsafe-API Step 1.5 RealBug-
/// direction collapse. Same magnitude as `W_ALGORITHM_NONE_COLLAPSE` in
/// Phase 2g — first RealBug-direction collapse in the series.
pub(super) const W_API_UNSAFE_COLLAPSE: f32 = -1.0;

/// User input flows into the deserialize call (request.data,
/// request.body, request.json, request.form, request.files, etc.) on
/// the call line or within ±10 lines. Negative weight — only fires
/// when the API is `Ambiguous`; the Step 1.5 collapses fire first
/// for Safe / Unsafe APIs.
pub(super) const W_USER_INPUT_TO_DESERIALIZE: f32 = -0.50;

/// Enclosing function is a route handler (decorated with `@app.route`,
/// `@router.post`, `@view`, `@require_http_methods`, etc., OR named
/// like `*_handler` / `*_endpoint`). Negative — Ambiguous APIs in
/// reachable network paths are higher-prior.
pub(super) const W_ENCLOSING_ROUTE_HANDLER: f32 = -0.30;

/// Enclosing function name suggests a load / import / upload /
/// restore operation. Negative (soft) — these names suggest data
/// crossing trust boundaries.
pub(super) const W_ENCLOSING_UPLOAD_LIKE: f32 = -0.10;

/// The deserialize call's source is a local-disk file (`open(...).read()`
/// flowing into the call, or `open(...)` as a direct argument).
/// Positive (soft) — local config files are usually trusted, AND
/// because pickle.loads-on-local-file is already covered by the
/// Unsafe-API collapse for the cases that matter.
pub(super) const W_LOCAL_FILE_SOURCE: f32 = 0.10;

/// Enclosing function looks like a test fixture. Positive (mirrors 2a-2g).
pub(super) const W_ENCLOSING_TEST_FUNCTION: f32 = 0.15;

/// Enclosing function name contains `_trusted`, `_admin`, or
/// `_internal`. Positive (soft) — developer-authored signal that the
/// data crosses a trust boundary.
pub(super) const W_TRUST_BOUNDARY_NAME: f32 = 0.10;

// ─────────────────────────────────────────────────────────────────────────────
// Lexicons used by source-classification helpers
// ─────────────────────────────────────────────────────────────────────────────

/// User-input identifier substrings. The evidence extractor checks
/// whether any of these appears within ±10 lines of the call site
/// (line-level heuristic, mirrors the legacy `has_nearby_user_input`).
pub(super) const USER_INPUT_NAME_SUBSTRINGS: &[&str] = &[
    "request.data",
    "request.body",
    "request.json",
    "request.form",
    "request.files",
    "request.get_json",
    "request.values",
    "request.args",
    "flask.request",
    "django.request",
    "self.request",
    "input(",
    "sys.argv",
];

/// Route-handler decorator substrings (line-level check on decorator
/// lines preceding the function definition).
pub(super) const ROUTE_HANDLER_DECORATOR_SUBSTRINGS: &[&str] = &[
    "@app.route",
    "@app.get",
    "@app.post",
    "@app.put",
    "@app.delete",
    "@router.get",
    "@router.post",
    "@router.put",
    "@router.delete",
    "@view",
    "@api_view",
    "@require_http_methods",
    "@csrf_exempt",
    "@login_required",
    "@blueprint.route",
];

/// Function-name substrings that suggest a route handler.
pub(super) const ROUTE_HANDLER_NAME_SUBSTRINGS: &[&str] =
    &["_handler", "_endpoint", "_view", "_route"];

/// Function-name substrings suggesting load / import / upload /
/// restore operations.
pub(super) const UPLOAD_LIKE_NAME_SUBSTRINGS: &[&str] = &[
    "upload", "import_", "_import", "load_", "_load", "restore", "ingest",
];

/// Function-name substrings suggesting a trust boundary has already
/// been crossed.
pub(super) const TRUST_BOUNDARY_NAME_SUBSTRINGS: &[&str] =
    &["_trusted", "_admin", "_internal", "_signed"];

/// Substrings that identify test code. Mirrors 2a–2g.
const TEST_FUNCTION_SUBSTRINGS: &[&str] = &["test_", "_test", "fixture", "setup", "teardown"];

// ─────────────────────────────────────────────────────────────────────────────
// Evidence
// ─────────────────────────────────────────────────────────────────────────────

/// Structured evidence extracted from a Python deserialize call site.
///
/// Populated by `evidence::extract_python_evidence` (Commit 4) and
/// consumed by [`predict`].
#[derive(Debug, Clone, Default, PartialEq)]
pub(super) struct Evidence {
    /// Which deserialize API the call site uses, post-reclassification
    /// (so `yaml.load(..., Loader=SafeLoader)` is `Safe`, not `Ambiguous`).
    pub api: Option<DeserializeApi>,

    /// The raw callee text (`pickle.loads`, `yaml.safe_load`, etc.).
    /// Used for the title/description; the predictor itself only
    /// reads `api`.
    pub callee_label: Option<String>,

    /// Name of the enclosing function, if any.
    pub enclosing_function: Option<String>,

    /// Name of the enclosing class, if any (informational; no weight).
    pub enclosing_class: Option<String>,

    /// File path string, used for diagnostics; no weight in this
    /// phase (auth-flow path heuristic is a JWT-specific signal).
    pub file_path: Option<String>,

    /// User-input identifier (e.g. `request.data`) appears within
    /// ±10 lines of the call site.
    pub user_input_nearby: bool,

    /// Enclosing function is decorated with a recognized route-handler
    /// decorator (e.g. `@app.route`) OR has a recognized handler name.
    pub enclosing_route_handler: bool,

    /// Enclosing function name suggests a load / import / upload /
    /// restore operation.
    pub enclosing_upload_like: bool,

    /// The deserialize call's source is a local-disk file (`open(...)`
    /// or `open(...).read()` argument).
    pub local_file_source: bool,

    /// Enclosing function name suggests a trust boundary has already
    /// been crossed (`_trusted`, `_admin`, `_internal`, `_signed`).
    pub trust_boundary_name: bool,

    /// `Some(reason)` if a `# repotoire: deserialize-safe[<reason>]`
    /// annotation appears on the call line. **Collapsing**.
    pub deserialize_safe_annotation: Option<String>,

    /// `Some(source)` if a `# repotoire: deserialize-vulnerable[<source>]`
    /// annotation appears on the call line. **Collapsing**.
    pub deserialize_vulnerable_annotation: Option<String>,
}

impl Evidence {
    #[cfg(test)]
    pub(super) fn empty() -> Self {
        Self::default()
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// Prediction
// ─────────────────────────────────────────────────────────────────────────────

#[derive(Debug, Clone)]
pub(super) struct Prediction {
    pub predicted: BranchLabel,
    pub alternative_branch: AlternativeBranch,
    pub predicted_severity: Severity,
    pub reasons: Vec<PredictionReason>,
    pub resolutions: Vec<ResolutionSignal>,
}

// ─────────────────────────────────────────────────────────────────────────────
// Scorer
// ─────────────────────────────────────────────────────────────────────────────

/// Build a [`Prediction`] from extracted [`Evidence`].
///
/// # Algorithm
///
/// 1. **Collapsing annotations first.** If `deserialize_safe_annotation`
///    or `deserialize_vulnerable_annotation` is set, commit to the
///    corresponding branch with confidence 1.0 and skip weighted
///    scoring.
/// 2. **Step 1.5 D1.a Safe-API collapse.** If `api ==
///    DeserializeApi::Safe`, commit to **Benign / Info** regardless
///    of any other signals.
/// 3. **Step 1.5 D1.b Unsafe-API collapse.** If `api ==
///    DeserializeApi::Unsafe`, commit to **RealBug / Critical**
///    regardless of any other signals.
/// 4. **Weighted scoring** (Ambiguous / Unknown APIs only). Sum
///    weights for each present signal.
/// 5. **Tiebreak**: sum exactly 0.0 → predict RealBug. Conservative
///    default for security findings.
///
/// # Severity mapping (D3)
///
/// - Safe-API collapse → `Severity::Info`.
/// - Unsafe-API collapse → `Severity::Critical`.
/// - Annotation `deserialize-vulnerable` → severity from
///   `severity_for_branch` (Critical via forced `-1.0` sum).
/// - Annotation `deserialize-safe` → `Severity::Info`.
/// - Predicted RealBug otherwise → severity bucketed from the
///   weighted sum.
/// - Predicted Benign otherwise → `Severity::Info`.
pub(super) fn predict(evidence: &Evidence) -> Prediction {
    let api = evidence.api.unwrap_or(DeserializeApi::Unknown);
    let api_label = api.callee_label();

    // ── Step 1: collapsing annotations. ──
    if let Some(reason) = &evidence.deserialize_safe_annotation {
        return collapse(
            BranchLabel::Benign,
            api,
            0.0,
            ResolutionSignal {
                kind: ResolutionKind::SourceAnnotation {
                    syntax: format!("# repotoire: deserialize-safe[{reason}]"),
                },
                description: format!(
                    "`deserialize-safe[{reason}]` annotation declares this \
                     deserialize call as safe (HMAC-signed payload, \
                     restricted-unpickler wrapper, internal trusted source, \
                     etc.); the finding collapses to Info."
                ),
                example: Some(format!(
                    "{api_label}(...)  # repotoire: deserialize-safe[{reason}]"
                )),
                collapses_to: BranchLabel::Benign,
            },
            PredictionReason {
                kind: PredictionReasonKind::Custom {
                    description: format!("deserialize-safe[{reason}] annotation"),
                },
                weight: 1.0,
                note: format!(
                    "Annotated as safely-wrapped ({reason}); not a \
                     deserialization risk."
                ),
            },
        );
    }
    if let Some(source) = &evidence.deserialize_vulnerable_annotation {
        return collapse(
            BranchLabel::RealBug,
            api,
            -1.0,
            ResolutionSignal {
                kind: ResolutionKind::SourceAnnotation {
                    syntax: format!("# repotoire: deserialize-vulnerable[{source}]"),
                },
                description: format!(
                    "`deserialize-vulnerable[{source}]` annotation declares \
                     this deserialize as exposed (third-party crate without \
                     verification, audited-untrusted, etc.); the finding \
                     stays at the existing severity."
                ),
                example: Some(format!(
                    "{api_label}(...)  # repotoire: deserialize-vulnerable[{source}]"
                )),
                collapses_to: BranchLabel::RealBug,
            },
            PredictionReason {
                kind: PredictionReasonKind::Custom {
                    description: format!("deserialize-vulnerable[{source}] annotation"),
                },
                weight: -1.0,
                note: format!("Annotated as deserialize-exposed (source: {source})."),
            },
        );
    }

    // ── Step 1.5a: D1.a Safe-API Benign-direction collapse. ──
    //
    // `yaml.safe_load`, `json.loads`, `yaml.load(..., Loader=SafeLoader)`,
    // ruamel.yaml's `YAML(typ='safe')` — these APIs guarantee
    // correctness regardless of input source. Same family as 2e's
    // defusedxml collapse and 2f's advocate collapse.
    if api.collapses_safe() {
        return collapse(
            BranchLabel::Benign,
            api,
            0.0,
            ResolutionSignal {
                kind: ResolutionKind::StructuralPattern {
                    description:
                        "Safe-by-construction deserialize API (json.loads / yaml.safe_load / ...)"
                            .to_string(),
                },
                description: "The call uses a Safe-by-construction \
                     deserialization API (`json.loads`, `yaml.safe_load`, \
                     `yaml.load(..., Loader=SafeLoader)`, or ruamel.yaml's \
                     `YAML(typ='safe').load`). These APIs do not have \
                     code-execution affordances. The input source is \
                     irrelevant to the safety verdict: a Safe API on \
                     attacker-controlled input is still Safe."
                    .to_string(),
                example: Some(
                    "yaml.safe_load(request.data)  # safe regardless of source".to_string(),
                ),
                collapses_to: BranchLabel::Benign,
            },
            PredictionReason {
                kind: PredictionReasonKind::StructuralPattern {
                    description: "Safe-by-construction deserialize API".to_string(),
                },
                weight: W_API_SAFE_COLLAPSE,
                note: "The call site uses an API that cannot execute code \
                       by design (json.loads / yaml.safe_load / yaml.load \
                       with explicit SafeLoader / ruamel YAML(typ='safe')). \
                       Phase 2h D1.a amendment: bidirectional Step 1.5 \
                       collapse — Benign direction."
                    .to_string(),
            },
        );
    }

    // ── Step 1.5b: D1.b Unsafe-API RealBug-direction collapse. ──
    //
    // `pickle.loads`, `pickle.load`, `marshal.loads`, `marshal.load`,
    // `yaml.load` *without* explicit safe Loader, `cPickle.loads` —
    // these APIs are unsafe by design. Pickle execution is Turing-
    // complete; YAML's default loader honors `!!python/object/apply`
    // tags. Same family as 2g's `'none'` collapse, but on API name.
    if api.collapses_unsafe() {
        return collapse(
            BranchLabel::RealBug,
            api,
            -1.0,
            ResolutionSignal {
                kind: ResolutionKind::StructuralPattern {
                    description:
                        "Unsafe-by-construction deserialize API (pickle.loads / marshal.loads / yaml.load without SafeLoader)"
                            .to_string(),
                },
                description: "The call uses an Unsafe-by-construction \
                     deserialization API (`pickle.loads`, `marshal.loads`, \
                     `yaml.load` without explicit `Loader=SafeLoader`, \
                     `cPickle.loads`). These APIs have code-execution \
                     affordances as their design contract: pickle is \
                     Turing-complete, marshal honors crafted code objects, \
                     default yaml.load instantiates Python objects via \
                     `!!python/object/apply` tags. No defensive coding \
                     compensates for using these APIs on attacker- \
                     controlled (or even attacker-writable local-file) \
                     input."
                    .to_string(),
                example: Some(
                    "# Replace pickle.loads with json.loads (or annotate if \
                     truly verified):\npickle.loads(data)  # repotoire: deserialize-safe[hmac-verified]"
                        .to_string(),
                ),
                collapses_to: BranchLabel::RealBug,
            },
            PredictionReason {
                kind: PredictionReasonKind::StructuralPattern {
                    description: "Unsafe-by-construction deserialize API".to_string(),
                },
                weight: W_API_UNSAFE_COLLAPSE,
                note: "The call site uses an API that grants arbitrary \
                       code execution by design (pickle.loads / pickle.load \
                       / marshal.loads / yaml.load without Loader / \
                       cPickle.loads). Phase 2h D1.b amendment: \
                       bidirectional Step 1.5 collapse — RealBug direction. \
                       This is the textbook CVE-2019-20907 family failure \
                       mode."
                    .to_string(),
            },
        );
    }

    // ── Step 2: weighted scoring (Ambiguous / Unknown APIs). ──
    let mut sum: f32 = 0.0;
    let mut reasons: Vec<PredictionReason> = Vec::new();

    if evidence.user_input_nearby {
        sum += W_USER_INPUT_TO_DESERIALIZE;
        reasons.push(PredictionReason {
            kind: PredictionReasonKind::StructuralPattern {
                description: "user input flows to deserialize call".to_string(),
            },
            weight: W_USER_INPUT_TO_DESERIALIZE,
            note: "User-controlled input (request.data, request.body, \
                   request.json, etc.) flows to the deserialize call. \
                   For Ambiguous APIs, this raises the prior on a real \
                   bug; for Safe APIs the Step 1.5 collapse fires first \
                   and this signal is suppressed."
                .to_string(),
        });
    }

    if evidence.enclosing_route_handler {
        sum += W_ENCLOSING_ROUTE_HANDLER;
        if let Some(fn_name) = &evidence.enclosing_function {
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::EnclosingScope {
                    scope_kind: "route_handler".to_string(),
                    name: fn_name.clone(),
                },
                weight: W_ENCLOSING_ROUTE_HANDLER,
                note: "Enclosing function is a route handler (decorator \
                       or naming convention); higher prior on attacker- \
                       reachable deserialize code."
                    .to_string(),
            });
        } else {
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::StructuralPattern {
                    description: "enclosing route handler context".to_string(),
                },
                weight: W_ENCLOSING_ROUTE_HANDLER,
                note: "Call site is in a route-handler context.".to_string(),
            });
        }
    }

    if evidence.enclosing_upload_like {
        sum += W_ENCLOSING_UPLOAD_LIKE;
        if let Some(fn_name) = &evidence.enclosing_function {
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::EnclosingScope {
                    scope_kind: "upload_handler".to_string(),
                    name: fn_name.clone(),
                },
                weight: W_ENCLOSING_UPLOAD_LIKE,
                note: "Enclosing function name suggests load/import/\
                       upload/restore — data likely crossing trust \
                       boundary."
                    .to_string(),
            });
        }
    }

    if evidence.local_file_source {
        sum += W_LOCAL_FILE_SOURCE;
        reasons.push(PredictionReason {
            kind: PredictionReasonKind::StructuralPattern {
                description: "local-disk file source".to_string(),
            },
            weight: W_LOCAL_FILE_SOURCE,
            note: "The deserialize call's source is a local-disk file. \
                   Local config files are usually trusted; soft positive. \
                   Note: pickle.loads-on-local-file is already covered \
                   by the Unsafe-API D1.b collapse for the cases that \
                   matter; this signal applies only to Ambiguous APIs."
                .to_string(),
        });
    }

    if evidence.trust_boundary_name {
        sum += W_TRUST_BOUNDARY_NAME;
        if let Some(fn_name) = &evidence.enclosing_function {
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::EnclosingScope {
                    scope_kind: "trust_boundary".to_string(),
                    name: fn_name.clone(),
                },
                weight: W_TRUST_BOUNDARY_NAME,
                note: "Enclosing function name contains a trust-boundary \
                       keyword (_trusted/_admin/_internal/_signed) — \
                       developer-authored signal that data has been \
                       verified."
                    .to_string(),
            });
        }
    }

    if let Some(fn_name) = &evidence.enclosing_function {
        if matches_test_function(fn_name) {
            sum += W_ENCLOSING_TEST_FUNCTION;
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::EnclosingScope {
                    scope_kind: "function".to_string(),
                    name: fn_name.clone(),
                },
                weight: W_ENCLOSING_TEST_FUNCTION,
                note: format!(
                    "Enclosing function `{fn_name}` looks like a \
                     test/fixture; test code rarely the actionable \
                     security target."
                ),
            });
        }
    }

    // ── Step 3: tiebreak + severity mapping. ──
    let predicted = if sum > 0.0 {
        BranchLabel::Benign
    } else {
        BranchLabel::RealBug
    };

    build_prediction(predicted, api, sum, reasons, Vec::new())
}

// ────────────────────────────��────────────────────────────────────────────────
// Helpers
// ─────────────────────────────────────────────────────────────────────────────

fn matches_test_function(name: &str) -> bool {
    let lower = name.to_lowercase();
    TEST_FUNCTION_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(sub))
}

/// True iff the function name matches any route-handler naming
/// convention. The evidence extractor combines this with decorator
/// detection.
pub(super) fn matches_route_handler_name(name: &str) -> bool {
    let lower = name.to_lowercase();
    ROUTE_HANDLER_NAME_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(sub))
}

/// True iff the function name suggests load / import / upload /
/// restore semantics.
pub(super) fn matches_upload_like_name(name: &str) -> bool {
    let lower = name.to_lowercase();
    UPLOAD_LIKE_NAME_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(sub))
}

/// True iff the function name contains a trust-boundary keyword.
pub(super) fn matches_trust_boundary_name(name: &str) -> bool {
    let lower = name.to_lowercase();
    TRUST_BOUNDARY_NAME_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(sub))
}

/// True iff a line (intended to be a decorator line preceding a
/// function definition) matches any route-handler decorator pattern.
pub(super) fn matches_route_handler_decorator(line: &str) -> bool {
    let trimmed = line.trim();
    ROUTE_HANDLER_DECORATOR_SUBSTRINGS
        .iter()
        .any(|sub| trimmed.starts_with(sub))
}

/// True iff a line contains any user-input identifier (request.data,
/// etc.).
pub(super) fn line_contains_user_input(line: &str) -> bool {
    let lower = line.to_lowercase();
    USER_INPUT_NAME_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(*sub))
}

fn collapse(
    label: BranchLabel,
    api: DeserializeApi,
    forced_sum: f32,
    resolution: ResolutionSignal,
    reason: PredictionReason,
) -> Prediction {
    build_prediction(label, api, forced_sum, vec![reason], vec![resolution])
}

fn build_prediction(
    predicted: BranchLabel,
    api: DeserializeApi,
    sum: f32,
    reasons: Vec<PredictionReason>,
    resolutions: Vec<ResolutionSignal>,
) -> Prediction {
    let api_label = api.callee_label();
    let predicted_severity = severity_for_branch(predicted, sum);
    let alternative_label = predicted.opposite();
    let alternative_severity = severity_for_branch(alternative_label, sum);

    let alternative_branch = AlternativeBranch {
        label: alternative_label,
        severity: alternative_severity,
        title: title_for_branch(alternative_label, api_label),
        description: description_for_branch(alternative_label, api_label),
        suggested_fix: suggested_fix_for_branch(alternative_label, api_label),
    };

    Prediction {
        predicted,
        alternative_branch,
        predicted_severity,
        reasons,
        resolutions,
    }
}

/// D3 severity mapping. RealBug severity buckets from the weighted sum
/// (or forced to Critical for the Unsafe-API collapse path):
///
/// - `sum <= -0.7` → Critical
/// - `-0.7 < sum <= -0.4` → High
/// - `-0.4 < sum < 0.0` → Medium
/// - `sum == 0.0` → Medium (tiebreak)
///
/// Benign → Info.
fn severity_for_branch(label: BranchLabel, sum: f32) -> Severity {
    match label {
        BranchLabel::RealBug => {
            if sum <= -0.7 {
                Severity::Critical
            } else if sum <= -0.4 {
                Severity::High
            } else {
                Severity::Medium
            }
        }
        BranchLabel::Benign => Severity::Info,
    }
}

fn title_for_branch(label: BranchLabel, api_label: &str) -> String {
    match label {
        BranchLabel::RealBug => format!("Potential insecure deserialization via {api_label}"),
        BranchLabel::Benign => {
            format!("Safe deserialization via {api_label} (informational)")
        }
    }
}

fn description_for_branch(label: BranchLabel, api_label: &str) -> String {
    match label {
        BranchLabel::RealBug => format!(
            "The `{api_label}` call appears to deserialize attacker-\
             reachable data with an unsafe API (`pickle.loads`, \
             `marshal.loads`, `yaml.load` without `SafeLoader`, ...). \
             Insecure deserialization allows attackers to execute \
             arbitrary code, bypass authentication, or access sensitive \
             data."
        ),
        BranchLabel::Benign => format!(
            "The `{api_label}` call appears to use a safe-by-construction \
             deserialization API (`json.loads`, `yaml.safe_load`, \
             `yaml.load(..., Loader=SafeLoader)`). The call is carried \
             as Info; the RealBug interpretation is preserved in \
             `alternative_branch` in case the predictor is wrong."
        ),
    }
}

fn suggested_fix_for_branch(label: BranchLabel, _api_label: &str) -> Option<String> {
    match label {
        BranchLabel::RealBug => Some(
            "Replace the unsafe deserialize call with a safe-by-construction \
             alternative:\n\n\
             ```python\n\
             # Instead of pickle.loads(data):\n\
             data = json.loads(text)\n\
             \n\
             # Instead of yaml.load(stream):\n\
             data = yaml.safe_load(stream)\n\
             ```\n\n\
             If the unsafe API is unavoidable (e.g. pickle for an \
             internal binary format), verify the payload with an HMAC \
             check before deserializing, OR subclass `pickle.Unpickler` \
             and override `find_class` to restrict allowed classes. \
             If this is intentional safe usage that the v0 predictor \
             cannot trace (cross-statement HMAC unwrap, etc.), annotate \
             `# repotoire: deserialize-safe[<reason>]` to collapse the \
             finding to Info definitively."
                .to_string(),
        ),
        BranchLabel::Benign => Some(
            "If this is intentional safe usage, annotate \
             `# repotoire: deserialize-safe[<reason>]` to collapse the \
             finding to Info definitively. If the alternative branch is \
             correct (the deserialize IS exposed to attacker-controlled \
             data via a path the predictor missed), audit the call's \
             source for user input and prefer a safe-by-construction \
             alternative (`json.loads`, `yaml.safe_load`)."
                .to_string(),
        ),
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// Annotation lookup helpers (called by evidence extraction)
// ─────────────────────────────────────────────────────────────────────────────

/// If `line` carries `# repotoire: deserialize-safe[<reason>]`, return
/// the reason. Defaults to `"unspecified"` if no arg supplied.
pub(super) fn extract_deserialize_safe_reason(line: &str) -> Option<String> {
    let ann = parse_python_comment(line)?;
    if ann.kind != "deserialize-safe" {
        return None;
    }
    if ann.args.is_empty() {
        Some("unspecified".to_string())
    } else {
        Some(ann.args[0].clone())
    }
}

/// If `line` carries `# repotoire: deserialize-vulnerable[<source>]`,
/// return the source. Defaults to `"unspecified"` if no arg supplied.
pub(super) fn extract_deserialize_vulnerable_source(line: &str) -> Option<String> {
    let ann = parse_python_comment(line)?;
    if ann.kind != "deserialize-vulnerable" {
        return None;
    }
    if ann.args.is_empty() {
        Some("unspecified".to_string())
    } else {
        Some(ann.args[0].clone())
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// API classification helpers
// ─────────────────────────────────────────────────────────────────────────────

/// Classify a callee text (e.g. `pickle.loads`, `yaml.safe_load`,
/// `json.loads`) into a `DeserializeApi`. The evidence extractor calls
/// this AFTER resolving aliases (`import pickle as p` → `p.loads`
/// classifies as `pickle.loads`).
///
/// **Note: this does NOT inspect the `Loader=` kwarg.** The evidence
/// extractor must reclassify `yaml.load(..., Loader=SafeLoader)` from
/// `Unsafe` to `Safe` based on the kwarg value before storing in
/// `Evidence::api`. This function only sees the raw callee.
pub(super) fn classify_deserialize_callee(callee: &str) -> DeserializeApi {
    // Normalize: lowercase, strip whitespace.
    let normalized: String = callee
        .chars()
        .filter(|c| !c.is_whitespace())
        .collect::<String>()
        .to_lowercase();

    // Safe APIs: never trigger code execution.
    const SAFE_CALLEES: &[&str] = &[
        "yaml.safe_load",
        "yaml.safe_load_all",
        "json.loads",
        "json.load",
        "simplejson.loads",
        "simplejson.load",
        "orjson.loads",
        "ujson.loads",
        "rapidjson.loads",
        "msgpack.unpackb", // msgpack itself is safe (no code execution)
        "msgpack.unpack",
    ];
    for safe in SAFE_CALLEES {
        if normalized == *safe || normalized.ends_with(&format!(".{safe}")) {
            return DeserializeApi::Safe;
        }
    }

    // Unsafe APIs: code execution by design.
    const UNSAFE_CALLEES: &[&str] = &[
        "pickle.loads",
        "pickle.load",
        "cpickle.loads",
        "cpickle.load",
        "_pickle.loads",
        "_pickle.load",
        "marshal.loads",
        "marshal.load",
        "shelve.open", // pickle-backed
        "dill.loads",
        "dill.load",
    ];
    for unsafe_callee in UNSAFE_CALLEES {
        if normalized == *unsafe_callee || normalized.ends_with(&format!(".{unsafe_callee}")) {
            return DeserializeApi::Unsafe;
        }
    }

    // `yaml.load` is Ambiguous by default — the evidence extractor
    // reclassifies it to Safe if `Loader=yaml.SafeLoader` (or
    // CSafeLoader) is present, or to Unsafe otherwise. We return
    // Ambiguous here so the kwarg-inspection step can decide.
    if normalized == "yaml.load" || normalized.ends_with(".yaml.load") {
        return DeserializeApi::Ambiguous;
    }

    DeserializeApi::Unknown
}

/// Inspect a `Loader=` kwarg value (raw source text such as
/// `yaml.SafeLoader`, `Loader`, `CSafeLoader`, etc.) to decide whether
/// it indicates the explicit safe loader. Used by the evidence
/// extractor to reclassify `yaml.load(..., Loader=SafeLoader)` from
/// Ambiguous to Safe.
pub(super) fn yaml_loader_is_safe(loader_value: &str) -> bool {
    let lower = loader_value.trim().to_lowercase();
    // Strip leading module prefix (yaml.SafeLoader → SafeLoader).
    let suffix = lower.rsplit('.').next().unwrap_or(&lower);
    matches!(suffix, "safeloader" | "csafeloader")
}

/// Inspect a `Loader=` kwarg value to decide whether it indicates an
/// explicit unsafe loader (the textbook `Loader=yaml.Loader` /
/// `Loader=yaml.FullLoader` form). `FullLoader` was the PyYAML 5.1
/// "compromise" loader; it still allows code execution via
/// `!!python/object/apply` in PyYAML < 5.4 (CVE-2020-1747). For
/// safety we treat both `Loader` and `FullLoader` as Unsafe.
pub(super) fn yaml_loader_is_unsafe(loader_value: &str) -> bool {
    let lower = loader_value.trim().to_lowercase();
    let suffix = lower.rsplit('.').next().unwrap_or(&lower);
    matches!(suffix, "loader" | "fullloader" | "unsafeloader")
}

// ─────────────────────────────────────────────────────────────────────────────
// Tests
// ─────────────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    // ─── Worked example A (decisions D1.a): Safe API + user input. ───
    #[test]
    fn case_a_safe_api_yaml_safe_load_collapses_to_benign() {
        // Even with user input + handler context, Safe-API collapses
        // to Benign. Without the collapse, this would score
        // -0.50 - 0.30 = -0.80 → RealBug Critical (wrong).
        let evidence = Evidence {
            api: Some(DeserializeApi::Safe),
            user_input_nearby: true,
            enclosing_route_handler: true,
            enclosing_function: Some("update_config".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
        // collapse emits exactly one reason
        assert_eq!(p.reasons.len(), 1);
        assert_eq!(p.resolutions.len(), 1);
        assert!(matches!(
            p.resolutions[0].kind,
            ResolutionKind::StructuralPattern { .. }
        ));
        assert_eq!(p.resolutions[0].collapses_to, BranchLabel::Benign);
    }

    // ─── Worked example B (decisions D1.b): Unsafe API + local file. ───
    #[test]
    fn case_b_unsafe_api_pickle_loads_collapses_to_realbug_critical() {
        // Even with local_file_source (+0.10), Unsafe-API collapses
        // to RealBug Critical. Without the collapse, this would score
        // +0.10 → Benign Info (wrong).
        let evidence = Evidence {
            api: Some(DeserializeApi::Unsafe),
            local_file_source: true,
            enclosing_function: Some("load_cache".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Critical);
        assert_eq!(p.reasons.len(), 1);
        assert_eq!(p.resolutions.len(), 1);
        assert!(matches!(
            p.resolutions[0].kind,
            ResolutionKind::StructuralPattern { .. }
        ));
        assert_eq!(p.resolutions[0].collapses_to, BranchLabel::RealBug);
    }

    // ─── Worked example C: json.loads + user input + handler → Benign. ───
    #[test]
    fn case_c_safe_api_json_loads_collapses_to_benign() {
        let evidence = Evidence {
            api: Some(DeserializeApi::Safe),
            user_input_nearby: true,
            enclosing_route_handler: true,
            enclosing_function: Some("api_handler".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
    }

    // ─── Safe-API collapse dominates every other signal ───
    #[test]
    fn safe_api_collapse_dominates_user_input_and_handler_signals() {
        let evidence = Evidence {
            api: Some(DeserializeApi::Safe),
            user_input_nearby: true,
            enclosing_route_handler: true,
            enclosing_upload_like: true,
            enclosing_function: Some("upload_handler".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
    }

    // ─── Unsafe-API collapse dominates every other signal ───
    #[test]
    fn case_e_unsafe_api_collapse_dominates_local_file_signal() {
        let evidence = Evidence {
            api: Some(DeserializeApi::Unsafe),
            local_file_source: true,
            trust_boundary_name: true,
            enclosing_function: Some("load_admin_signed".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Critical);
    }

    // ─── Ambiguous: weighted scoring fires ───
    #[test]
    fn ambiguous_api_with_user_input_and_handler_predicts_realbug_high() {
        // -0.50 - 0.30 = -0.80 → High (just over the -0.7 Critical threshold).
        let evidence = Evidence {
            api: Some(DeserializeApi::Ambiguous),
            user_input_nearby: true,
            enclosing_route_handler: true,
            enclosing_function: Some("config_handler".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Critical); // -0.80 ≤ -0.7
        let total: f32 = p.reasons.iter().map(|r| r.weight).sum();
        assert!((total + 0.80).abs() < 1e-6, "expected -0.80, got {total}");
    }

    #[test]
    fn ambiguous_api_test_function_predicts_benign() {
        // +0.15 (test) → Benign.
        let evidence = Evidence {
            api: Some(DeserializeApi::Ambiguous),
            enclosing_function: Some("test_yaml_parse".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
    }

    #[test]
    fn ambiguous_api_local_file_predicts_benign_via_soft_positive() {
        // +0.10 → Benign.
        let evidence = Evidence {
            api: Some(DeserializeApi::Ambiguous),
            local_file_source: true,
            enclosing_function: Some("read_config".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
    }

    // ─── deserialize-safe annotation collapses ───
    #[test]
    fn deserialize_safe_annotation_collapses_to_benign() {
        // Even on an Unsafe API, the annotation collapse takes
        // priority over the Unsafe-API collapse.
        let evidence = Evidence {
            api: Some(DeserializeApi::Unsafe),
            deserialize_safe_annotation: Some("hmac-verified".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
        assert_eq!(p.resolutions.len(), 1);
        assert!(matches!(
            p.resolutions[0].kind,
            ResolutionKind::SourceAnnotation { .. }
        ));
    }

    #[test]
    fn deserialize_vulnerable_annotation_collapses_to_realbug() {
        // Even on a Safe API, the vulnerable annotation collapses to
        // RealBug. (Use case: third-party adapter that wraps json.loads
        // but rebuilds the object tree without validation.)
        let evidence = Evidence {
            api: Some(DeserializeApi::Safe),
            deserialize_vulnerable_annotation: Some("third-party-no-validation".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        // Forced sum -1.0 → Critical
        assert_eq!(p.predicted_severity, Severity::Critical);
    }

    // ─── Tiebreak ───
    #[test]
    fn empty_evidence_tiebreaks_realbug() {
        let p = predict(&Evidence::empty());
        assert_eq!(p.predicted, BranchLabel::RealBug);
        // sum=0 → Medium
        assert_eq!(p.predicted_severity, Severity::Medium);
    }

    // ─── Sign convention ───
    #[test]
    #[allow(clippy::assertions_on_constants)]
    fn realbug_signal_weights_are_negative() {
        assert!(W_USER_INPUT_TO_DESERIALIZE < 0.0);
        assert!(W_ENCLOSING_ROUTE_HANDLER < 0.0);
        assert!(W_ENCLOSING_UPLOAD_LIKE < 0.0);
        assert!(W_API_UNSAFE_COLLAPSE < 0.0);
    }

    #[test]
    #[allow(clippy::assertions_on_constants)]
    fn benign_signal_weights_are_positive() {
        assert!(W_LOCAL_FILE_SOURCE > 0.0);
        assert!(W_ENCLOSING_TEST_FUNCTION > 0.0);
        assert!(W_TRUST_BOUNDARY_NAME > 0.0);
        assert!(W_API_SAFE_COLLAPSE > 0.0);
    }

    // ─── Honest review pin: bidirectional D1 amendment justification ───
    #[test]
    #[allow(clippy::assertions_on_constants)]
    fn d1a_amendment_required_case_a_without_collapse_predicts_realbug_critical() {
        // Pin the math: without the D1.a Safe-API collapse, Case A
        // (yaml.safe_load(request.data) in a Flask handler) would
        // additive-score to -0.50 - 0.30 = -0.80 → RealBug Critical.
        // This is the WRONG answer for a safe-by-construction API.
        let additive_sum = W_USER_INPUT_TO_DESERIALIZE + W_ENCLOSING_ROUTE_HANDLER;
        assert!(
            additive_sum <= -0.7,
            "Under additive-only, Case A would predict RealBug Critical \
             for a safe-by-construction API. This is the bug D1.a fixes. \
             Pin: {additive_sum}"
        );
    }

    #[test]
    #[allow(clippy::assertions_on_constants)]
    fn d1b_amendment_required_case_b_without_collapse_predicts_benign() {
        // Pin the math: without the D1.b Unsafe-API collapse, Case B
        // (pickle.loads on a local file) would additive-score to
        // +0.10 → Benign Info. This is the WRONG answer for a
        // Turing-complete deserialization API (CVE-2019-20907 family).
        let additive_sum = W_LOCAL_FILE_SOURCE;
        assert!(
            additive_sum > 0.0,
            "Under additive-only, Case B would predict Benign Info \
             for pickle.loads on a local file. This is the bug D1.b \
             fixes. Pin: {additive_sum}"
        );
    }

    // ─── DeserializeApi helpers ───
    #[test]
    fn deserialize_api_collapses_predicates() {
        assert!(DeserializeApi::Safe.collapses_safe());
        assert!(!DeserializeApi::Safe.collapses_unsafe());
        assert!(DeserializeApi::Unsafe.collapses_unsafe());
        assert!(!DeserializeApi::Unsafe.collapses_safe());
        assert!(!DeserializeApi::Ambiguous.collapses_safe());
        assert!(!DeserializeApi::Ambiguous.collapses_unsafe());
        assert!(!DeserializeApi::Unknown.collapses_safe());
        assert!(!DeserializeApi::Unknown.collapses_unsafe());
    }

    #[test]
    fn deserialize_api_is_recognized() {
        assert!(DeserializeApi::Safe.is_recognized());
        assert!(DeserializeApi::Unsafe.is_recognized());
        assert!(DeserializeApi::Ambiguous.is_recognized());
        assert!(!DeserializeApi::Unknown.is_recognized());
    }

    #[test]
    fn deserialize_api_callee_label_is_stable() {
        assert_eq!(DeserializeApi::Safe.callee_label(), "safe-deserializer");
        assert_eq!(DeserializeApi::Unsafe.callee_label(), "unsafe-deserializer");
        assert_eq!(
            DeserializeApi::Ambiguous.callee_label(),
            "ambiguous-deserializer"
        );
        assert_eq!(DeserializeApi::Unknown.callee_label(), "deserialize client");
    }

    // ─── Callee classification ───
    #[test]
    fn classify_safe_callees() {
        assert_eq!(
            classify_deserialize_callee("yaml.safe_load"),
            DeserializeApi::Safe
        );
        assert_eq!(
            classify_deserialize_callee("yaml.safe_load_all"),
            DeserializeApi::Safe
        );
        assert_eq!(
            classify_deserialize_callee("json.loads"),
            DeserializeApi::Safe
        );
        assert_eq!(
            classify_deserialize_callee("json.load"),
            DeserializeApi::Safe
        );
        assert_eq!(
            classify_deserialize_callee("simplejson.loads"),
            DeserializeApi::Safe
        );
        assert_eq!(
            classify_deserialize_callee("orjson.loads"),
            DeserializeApi::Safe
        );
    }

    #[test]
    fn classify_unsafe_callees() {
        assert_eq!(
            classify_deserialize_callee("pickle.loads"),
            DeserializeApi::Unsafe
        );
        assert_eq!(
            classify_deserialize_callee("pickle.load"),
            DeserializeApi::Unsafe
        );
        assert_eq!(
            classify_deserialize_callee("cPickle.loads"),
            DeserializeApi::Unsafe
        );
        assert_eq!(
            classify_deserialize_callee("marshal.loads"),
            DeserializeApi::Unsafe
        );
        assert_eq!(
            classify_deserialize_callee("dill.loads"),
            DeserializeApi::Unsafe
        );
    }

    #[test]
    fn classify_yaml_load_as_ambiguous() {
        // yaml.load alone is Ambiguous; the evidence extractor inspects
        // the Loader= kwarg to reclassify to Safe or Unsafe.
        assert_eq!(
            classify_deserialize_callee("yaml.load"),
            DeserializeApi::Ambiguous
        );
    }

    #[test]
    fn classify_unknown_callee() {
        assert_eq!(
            classify_deserialize_callee("json.dumps"),
            DeserializeApi::Unknown
        );
        assert_eq!(
            classify_deserialize_callee("foo.bar"),
            DeserializeApi::Unknown
        );
    }

    // ─── yaml Loader= classification ───
    #[test]
    fn yaml_safe_loader_recognized() {
        assert!(yaml_loader_is_safe("yaml.SafeLoader"));
        assert!(yaml_loader_is_safe("SafeLoader"));
        assert!(yaml_loader_is_safe("yaml.CSafeLoader"));
        assert!(yaml_loader_is_safe("CSafeLoader"));
    }

    #[test]
    fn yaml_unsafe_loader_recognized() {
        assert!(yaml_loader_is_unsafe("yaml.Loader"));
        assert!(yaml_loader_is_unsafe("Loader"));
        assert!(yaml_loader_is_unsafe("yaml.FullLoader"));
        assert!(yaml_loader_is_unsafe("yaml.UnsafeLoader"));
    }

    #[test]
    fn yaml_loader_classifications_disjoint() {
        assert!(!yaml_loader_is_safe("yaml.Loader"));
        assert!(!yaml_loader_is_unsafe("yaml.SafeLoader"));
    }

    // ─── Lexicon helpers ───
    #[test]
    fn route_handler_decorator_matches() {
        assert!(matches_route_handler_decorator("@app.route('/foo')"));
        assert!(matches_route_handler_decorator("    @app.post('/x')"));
        assert!(matches_route_handler_decorator("@router.get('/v1')"));
        assert!(matches_route_handler_decorator("@blueprint.route('/x')"));
        assert!(!matches_route_handler_decorator("@dataclass"));
    }

    #[test]
    fn route_handler_name_matches() {
        assert!(matches_route_handler_name("login_handler"));
        assert!(matches_route_handler_name("api_endpoint"));
        assert!(matches_route_handler_name("user_view"));
        assert!(!matches_route_handler_name("compute_total"));
    }

    #[test]
    fn upload_like_name_matches() {
        assert!(matches_upload_like_name("upload_file"));
        assert!(matches_upload_like_name("import_data"));
        assert!(matches_upload_like_name("load_config"));
        assert!(matches_upload_like_name("restore_session"));
        assert!(matches_upload_like_name("ingest_payload"));
        assert!(!matches_upload_like_name("compute"));
    }

    #[test]
    fn trust_boundary_name_matches() {
        assert!(matches_trust_boundary_name("parse_trusted"));
        assert!(matches_trust_boundary_name("load_admin_data"));
        assert!(matches_trust_boundary_name("read_internal_state"));
        assert!(matches_trust_boundary_name("decode_signed_blob"));
        assert!(!matches_trust_boundary_name("parse_input"));
    }

    #[test]
    fn line_contains_user_input_matches() {
        assert!(line_contains_user_input("data = request.data"));
        assert!(line_contains_user_input("    return request.json"));
        assert!(line_contains_user_input("body = request.body"));
        assert!(!line_contains_user_input("compute(local_var)"));
    }

    // ─── Extract helpers ───
    #[test]
    fn extract_deserialize_safe_with_reason() {
        assert_eq!(
            extract_deserialize_safe_reason(
                "pickle.loads(data)  # repotoire: deserialize-safe[hmac-verified]"
            ),
            Some("hmac-verified".to_string())
        );
    }

    #[test]
    fn extract_deserialize_safe_without_reason() {
        assert_eq!(
            extract_deserialize_safe_reason("yaml.load(blob)  # repotoire: deserialize-safe"),
            Some("unspecified".to_string())
        );
    }

    #[test]
    fn extract_deserialize_vulnerable_with_source() {
        assert_eq!(
            extract_deserialize_vulnerable_source(
                "cattrs.structure(blob, cls)  # repotoire: deserialize-vulnerable[third-party]"
            ),
            Some("third-party".to_string())
        );
    }

    #[test]
    fn extract_deserialize_safe_ignores_other_kinds() {
        // Cross-kind isolation: our parser only honors deserialize-* kinds.
        assert_eq!(
            extract_deserialize_safe_reason("subprocess.run(...)  # repotoire: command-static[ok]"),
            None
        );
        assert_eq!(
            extract_deserialize_safe_reason("jwt.decode(...)  # repotoire: jwt-safe[ok]"),
            None
        );
        assert_eq!(
            extract_deserialize_safe_reason("ET.parse(blob)  # repotoire: xxe-safe[ok]"),
            None
        );
    }

    #[test]
    fn extract_deserialize_vulnerable_ignores_other_kinds() {
        assert_eq!(
            extract_deserialize_vulnerable_source(
                "jwt.decode(...)  # repotoire: jwt-vulnerable[alg-from-header]"
            ),
            None
        );
    }
}