repotoire 0.9.0

//! Dual-branch predictor for Python SQL-injection call sites.
//!
//! Implements decisions D1 (weights, with **trifecta** Step 1.5 collapses:
//! D1.a Parameterized/ORM Benign collapse + D1.b String-formatted RealBug
//! collapse + D1.c `.raw()` Escape-Hatch RealBug collapse) and D3
//! (severity) from
//! `docs/superpowers/specs/2026-05-09-dual-branch-phase2-sql-injection-decisions.md`.
//!
//! # What this module does
//!
//! Given a Python SQL call site (`cursor.execute(...)`,
//! `Model.objects.raw(...)`, `db.execute(text(...), {...})`,
//! `Model.objects.filter(...)`, etc.), produce a [`Prediction`] that:
//!
//! 1. Picks `RealBug` or `Benign` as the predicted branch.
//! 2. Carries the other branch as the alternative.
//! 3. Lists typed [`PredictionReason`]s the predictor used.
//! 4. Optionally lists [`ResolutionSignal`]s (collapsing or hint-grade).
//!
//! # Trifecta Step 1.5 collapse — the structural shape of 2j
//!
//! Phase 2j extends 2i's bidirectional collapse pattern with the
//! cleanest Safe/Unsafe AST split in the series:
//!
//! - **D1.a — Parameterized/ORM Benign-direction collapse.** When the
//!   call site is `cursor.execute(<string literal>, <values>)`, an
//!   ORM keyword-filter expression (`Model.objects.filter/get/create/...`),
//!   or `text(<literal>) + bound-params dict`, commit to **Benign /
//!   Info** regardless of additive signals. Same family as 2i's
//!   typed-value pymongo collapse.
//!
//! - **D1.b — String-formatted RealBug-direction collapse.** When the
//!   call's first argument is an f-string with SQL keywords, a string
//!   concatenation expression, a `.format()` method call, or a
//!   `%`-operator expression with SQL keywords, commit to **RealBug /
//!   Critical**. Same family as 2g's `algorithms='none'` and 2h's
//!   `pickle.loads`.
//!
//! - **D1.c — `.raw()` Escape-Hatch RealBug-direction collapse (THE
//!   HEADLINE).** When the call site is `<Model>.objects.raw(<formatted
//!   SQL>)` — the Django ORM raw-SQL escape hatch invoked with
//!   f-string / concat / `.format()` user input — commit to **RealBug /
//!   Critical**. Django developers commonly think `.raw()` is safe
//!   because "it's on the ORM"; the dual-branch's contribution is the
//!   explanatory `resolution_signal` distinguishing `.raw()` from
//!   `.filter()`.
//!
//! # Architectural framing: 2j is AST-dominant
//!
//! Phase 2j is the **fourth architectural use case** for dual-branch
//! (see decisions doc §6 + §7):
//!
//! 1. **2e/2f**: Benign-collapse-only (library identity).
//! 2. **2g/2h**: Bidirectional, both directions surface new findings.
//! 3. **2i**: Bidirectional, primarily FP reduction.
//! 4. **2j** (this phase): **AST-dominant.** Three Step 1.5 collapses
//!    cover the AST-classifiable majority; the weighted-sum path is a
//!    thin tiebreaker for the small Ambiguous bucket.
//!
//! # The D5.2 honest-review-driven UserInputSource framing
//!
//! Unlike 2i (where TypedString = +0.20 and UnstructuredJson = -0.30
//! were asymmetric because pymongo's BSON serialization made Python's
//! `str` typing load-bearing), 2j collapses both source families to
//! the same magnitude (-0.30 each) because the source's Python type
//! is **not load-bearing for SQL** — f-string interpolation flattens
//! both `request.form` (str) and `request.json` (dict) into the same
//! SQL string. The split is preserved for lexicon consistency with
//! 2i but does not carry separate weights. See D5.2.
//!
//! # Sign convention
//!
//! `weight > 0` leans **Benign**; `weight < 0` leans **RealBug**.
//!
//! # Severity mapping (D3)
//!
//! - Predicted **RealBug** via D1.b string-formatted collapse → `Critical`.
//! - Predicted **RealBug** via D1.c `.raw()` collapse → `Critical`.
//! - Predicted **RealBug** otherwise → `Critical` (`sum <= -0.7`),
//!   `High` (`-0.7 < sum <= -0.4`), `Medium` (shallow negative or tiebreak).
//! - Predicted **Benign** (via D1.a collapse OR weighted-positive)
//!   → `Severity::Info`.
//!
//! # Why these weights
//!
//! See decision **D1** (with §6 D1 amendment for the trifecta
//! collapse). Numbers tagged `TUNABLE`. Phase 3 misprediction logging
//! is the right place to retune.

use super::annotation::parse_python_comment;
use crate::dual_branch::{
    AlternativeBranch, BranchLabel, PredictionReason, PredictionReasonKind, ResolutionKind,
    ResolutionSignal,
};
use crate::models::Severity;

// ─────────────────────────────────────────────────────────────────────────────
// SqlApi — the API-classification enum for Phase 2j
// ─────────────────────────────────────────────────────────────────────────────

/// Which structural shape the SQL call site has, and what safety
/// contract that shape provides.
///
/// The classification drives the Step 1.5 trifecta collapses:
/// `Safe` → D1.a Benign collapse; `Unsafe` → D1.b RealBug collapse;
/// `UnsafeRaw` → D1.c RealBug collapse (Django `.raw()` escape hatch);
/// `Ambiguous` → fall through to weighted scoring.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) enum SqlApi {
    /// Parameterized SQL / ORM filter expression. Examples:
    /// - `cursor.execute("SELECT ... %s", (val,))`
    /// - `cursor.execute("SELECT ... ?", [val])`
    /// - `Model.objects.filter(id=val)` / `.get(...)` / `.create(...)`
    /// - `db.execute(text("SELECT :id"), {"id": val})`
    /// - `session.query(Model).filter(Model.id == val)`
    ///
    /// Triggers D1.a Benign-direction collapse.
    Safe,
    /// String-formatted SQL: f-string / `+`-concat / `.format()` /
    /// `%`-operator with SQL keywords inside the call's first
    /// argument. Triggers D1.b RealBug-direction collapse.
    Unsafe,
    /// Django `.raw()` escape hatch with formatted SQL — the Phase 2j
    /// headline distinction. `Model.objects.raw(...)` is the
    /// developer-controlled raw-SQL ORM method; combined with
    /// f-string / concat / `.format()` user input, this is the
    /// textbook SQL-injection shape. Triggers D1.c RealBug-direction
    /// collapse.
    UnsafeRaw,
    /// Recognized SQL sink call (`cursor.execute`, `db.execute`,
    /// `Model.objects.raw`, etc.) but the first argument is neither a
    /// clean parameterized pair (D1.a) nor a formatted SQL expression
    /// (D1.b/D1.c). Examples: opaque variable (`cursor.execute(q)`),
    /// `.raw()` with a static literal, cross-statement assembly.
    /// Falls through to weighted scoring.
    Ambiguous,
    /// Recognized SQL sink call but not classified. Treated as
    /// Ambiguous for scoring; preserved as a distinct variant for
    /// labels.
    Unknown,
}

impl SqlApi {
    /// Human-readable label for the API used in titles/descriptions.
    pub(super) fn callee_label(self) -> &'static str {
        match self {
            SqlApi::Safe => "parameterized-sql-call",
            SqlApi::Unsafe => "string-formatted-sql-call",
            SqlApi::UnsafeRaw => "orm-raw-escape-hatch-call",
            SqlApi::Ambiguous => "ambiguous-sql-call",
            SqlApi::Unknown => "sql-call",
        }
    }

    /// True iff the API is one of the recognized SQL call shapes.
    /// Gates the Phase 2j dual-branch emission path: only recognized
    /// Python sites get the predictor-aware shape; non-Python and
    /// unrecognized calls still go through the legacy regex scanner
    /// per decisions D5.1.
    #[cfg(test)]
    pub(super) fn is_recognized(self) -> bool {
        !matches!(self, SqlApi::Unknown)
    }

    /// True iff this shape triggers the D1.a Parameterized/ORM Benign
    /// collapse.
    pub(super) fn collapses_safe(self) -> bool {
        matches!(self, SqlApi::Safe)
    }

    /// True iff this shape triggers the D1.b String-formatted RealBug
    /// collapse.
    pub(super) fn collapses_unsafe(self) -> bool {
        matches!(self, SqlApi::Unsafe)
    }

    /// True iff this shape triggers the D1.c `.raw()` Escape-Hatch
    /// RealBug collapse.
    pub(super) fn collapses_unsafe_raw(self) -> bool {
        matches!(self, SqlApi::UnsafeRaw)
    }
}

/// The two user-input source families, preserved for lexicon
/// consistency with Phase 2i but with **collapsed magnitudes** for
/// Phase 2j (per D5.2 honest-review finding).
///
/// In 2i, pymongo's BSON serialization made Python's `str` typing
/// load-bearing — `TypedString` was +0.20 and `UnstructuredJson` was
/// -0.30. For SQL injection, the source's Python type is not
/// load-bearing: f-string interpolation flattens both into the same
/// SQL string. Both source families weigh in negatively (-0.30 each)
/// in 2j; the enum split is preserved so future Phase 3 calibration
/// can differentiate them if signal-tuning surfaces evidence.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub(super) enum UserInputSource {
    /// `request.form`, `request.args`, `request.values`, explicit
    /// `str(...)` cast of user input.
    TypedString,
    /// `request.json`, `request.get_json()`, `request.body`, raw
    /// `json.loads(request.data)`.
    UnstructuredJson,
    /// No user-input identifier detected within ±10 lines of the call.
    #[default]
    None,
}

// ─────────────────────────────────────────────────────────────────────────────
// Tunable weights
// ─────────────────────────────────────────────────────────────────────────────

// TUNABLE: see Phase 3 misprediction logging.
//
// Sign convention: positive leans Benign, negative leans RealBug.
//
// Calibration target (per decisions doc §6 worked examples Case A-H):
//   A. cursor.execute("SELECT ... %s", (req.form['id'],)):
//      Safe collapse → Benign Info. ✅
//   B. cursor.execute(f"SELECT ... {req.form['id']}"):
//      Unsafe collapse → RealBug Critical. ✅
//   C. cursor.execute("SELECT ... {}".format(int(req.form['id']))):
//      Unsafe collapse → RealBug Critical. v0 D5.3 over-fires.
//   D. User.objects.filter(id=req.GET['id']):
//      Safe collapse → Benign Info. ✅
//   E. User.objects.raw("SELECT ... " + req.GET['id']):
//      UnsafeRaw collapse → RealBug Critical. ✅ HEADLINE.
//   F. cursor.execute("INSERT INTO log VALUES ('static')"):
//      Safe collapse (static literal, no formatting) → Benign Info. ✅
//   G. cursor.execute(query) opaque variable:
//      Ambiguous → -0.30 (UserInputSource) - 0.20 (handler) = -0.50
//      → RealBug High. ✅ (or Medium when no nearby user input.)
//   H. db.execute(text("SELECT :id"), {"id": req.form['id']}):
//      Safe collapse → Benign Info. ✅

/// Informational weight for the D1.a Parameterized/ORM Step 1.5 Benign-
/// direction collapse.
pub(super) const W_API_SAFE_COLLAPSE: f32 = 1.0;

/// Informational weight for the D1.b String-formatted Step 1.5 RealBug-
/// direction collapse.
pub(super) const W_API_UNSAFE_COLLAPSE: f32 = -1.0;

/// Informational weight for the D1.c `.raw()` Escape-Hatch Step 1.5
/// RealBug-direction collapse.
pub(super) const W_API_UNSAFE_RAW_COLLAPSE: f32 = -1.0;

/// User input from a TypedString source. Negative — even string-typed
/// input is dangerous when interpolated into a SQL string (unlike 2i
/// where pymongo's BSON typing made TypedString positive). See D5.2.
pub(super) const W_USER_INPUT_TYPED_STRING_NEARBY: f32 = -0.30;

/// User input from an UnstructuredJson source. Negative — same
/// magnitude as TypedString because f-string interpolation flattens
/// both into the same SQL string. The enum split is preserved for
/// lexicon consistency with 2i. See D5.2.
pub(super) const W_USER_INPUT_UNSTRUCTURED_JSON_NEARBY: f32 = -0.30;

/// First argument is a string literal containing SQL keywords with NO
/// f-string / concat / format / `%` interpolation. Strong Benign
/// signal: developer wrote a literal SQL string.
pub(super) const W_STATIC_SQL_STRING_LITERAL: f32 = 0.40;

/// Line contains SQL keyword (SELECT/INSERT/UPDATE/DELETE) but no
/// formatting marker in the call's first argument. Soft Benign.
pub(super) const W_SQL_KEYWORD_NO_FORMATTING: f32 = 0.10;

/// Enclosing function is a route handler. Negative; lighter than
/// 2e–2h's `-0.30` because the AST classification is doing most of
/// the work — the handler prior is a thin tiebreaker.
pub(super) const W_ENCLOSING_ROUTE_HANDLER: f32 = -0.20;

/// Enclosing function looks like a test fixture. Positive (mirrors
/// 2a-2i).
pub(super) const W_ENCLOSING_TEST_FUNCTION: f32 = 0.15;

/// Enclosing function name contains `_trusted`, `_admin`, `_internal`,
/// `_validated`, `_signed`. Positive (soft).
pub(super) const W_TRUST_BOUNDARY_NAME: f32 = 0.10;

// ─────────────────────────────────────────────────────────────────────────────
// Lexicons used by source-classification helpers
// ─────────────────────────────────────────────────────────────────────────────

/// User-input identifier substrings for the `TypedString` source family.
pub(super) const TYPED_STRING_USER_INPUT_SUBSTRINGS: &[&str] = &[
    "request.form",
    "request.args",
    "request.values",
    "request.cookies",
    "request.headers",
    "request.path_params",
    "request.GET",
    "request.POST",
];

/// User-input identifier substrings for the `UnstructuredJson` source family.
pub(super) const UNSTRUCTURED_JSON_USER_INPUT_SUBSTRINGS: &[&str] = &[
    "request.json",
    "request.get_json",
    "request.body",
    "request.data",
    "flask.request.json",
    "self.request.body",
];

/// Route-handler decorator substrings (line-level check on decorator
/// lines preceding the function definition).
pub(super) const ROUTE_HANDLER_DECORATOR_SUBSTRINGS: &[&str] = &[
    "@app.route",
    "@app.get",
    "@app.post",
    "@app.put",
    "@app.delete",
    "@router.get",
    "@router.post",
    "@router.put",
    "@router.delete",
    "@view",
    "@api_view",
    "@require_http_methods",
    "@csrf_exempt",
    "@login_required",
    "@blueprint.route",
];

/// Function-name substrings that suggest a route handler.
pub(super) const ROUTE_HANDLER_NAME_SUBSTRINGS: &[&str] =
    &["_handler", "_endpoint", "_view", "_route"];

/// Function-name substrings suggesting a trust boundary has already
/// been crossed.
pub(super) const TRUST_BOUNDARY_NAME_SUBSTRINGS: &[&str] =
    &["_trusted", "_admin", "_internal", "_validated", "_signed"];

/// Substrings that identify test code. Mirrors 2a–2i.
const TEST_FUNCTION_SUBSTRINGS: &[&str] = &["test_", "_test", "fixture", "setup", "teardown"];

/// SQL keywords used for `W_SQL_KEYWORD_NO_FORMATTING`.
pub(super) const SQL_KEYWORDS: &[&str] = &[
    "SELECT", "INSERT", "UPDATE", "DELETE", "DROP", "CREATE", "ALTER", "TRUNCATE", "EXEC",
    "EXECUTE",
];

/// Django ORM "safe" keyword-filter methods: each takes keyword
/// arguments that the ORM safely binds, NOT raw SQL.
pub(super) const SAFE_DJANGO_ORM_METHODS: &[&str] = &[
    "filter", "get", "exclude", "create", "update", "delete", "all", "none", "first", "last",
    "count", "exists",
];

/// Django ORM "unsafe escape hatch" methods: take raw SQL strings.
/// Combined with formatting → D1.c UnsafeRaw collapse.
pub(super) const UNSAFE_RAW_DJANGO_ORM_METHODS: &[&str] = &["raw", "extra"];

/// SQLAlchemy / generic SQL sink method names: take a SQL string as
/// the first positional argument.
pub(super) const SQL_SINK_METHODS: &[&str] = &[
    "execute",
    "executemany",
    "executescript",
    "scalar",
    "scalars",
    "from_statement",
    "mogrify",
    "run_sql",
    "execute_sql",
    "query",
];

/// SQLAlchemy `text()` callable name — recognized as a parameterized-
/// SQL constructor when paired with a bound-params dict in the
/// enclosing `db.execute(text(<literal>), {<binds>})` call.
pub(super) const SQLALCHEMY_TEXT_FUNCTION_NAMES: &[&str] = &["text", "literal_column"];

// ─────────────────────────────────────────────────────────────────────────────
// Evidence
// ─────────────────────────────────────────────────────────────────────────────

/// Structured evidence extracted from a Python SQL-injection call
/// site.
///
/// Populated by `evidence::extract_python_evidence` (Commit 4) and
/// consumed by [`predict`].
#[derive(Debug, Clone, Default, PartialEq)]
pub(super) struct Evidence {
    /// Which SQL call shape the site exhibits, post-structural-
    /// classification.
    pub api: Option<SqlApi>,

    /// The raw callee text (`cursor.execute`, `User.objects.raw`,
    /// `db.execute`, etc.). Used for the title/description; the
    /// predictor itself only reads `api`.
    pub callee_label: Option<String>,

    /// Name of the enclosing function, if any.
    pub enclosing_function: Option<String>,

    /// Name of the enclosing class, if any (informational; no weight).
    pub enclosing_class: Option<String>,

    /// File path string, used for diagnostics; no weight in this phase.
    pub file_path: Option<String>,

    /// Which user-input source family appears within ±10 lines of the
    /// call. See [`UserInputSource`].
    pub user_input_source: UserInputSource,

    /// Enclosing function is decorated with a recognized route-handler
    /// decorator (e.g. `@app.route`) OR has a recognized handler name.
    pub enclosing_route_handler: bool,

    /// Enclosing function name suggests a trust boundary has already
    /// been crossed.
    pub trust_boundary_name: bool,

    /// First argument of the call is a string literal containing SQL
    /// keywords with NO formatting marker. Triggers
    /// [`W_STATIC_SQL_STRING_LITERAL`].
    pub static_sql_string_literal: bool,

    /// Line contains a SQL keyword AND the call's first argument
    /// shows no formatting. Triggers
    /// [`W_SQL_KEYWORD_NO_FORMATTING`].
    pub sql_keyword_no_formatting: bool,

    /// `Some(reason)` if a `# repotoire: sql-safe[<reason>]`
    /// annotation appears on the call line. **Collapsing**.
    pub sql_safe_annotation: Option<String>,

    /// `Some(source)` if a `# repotoire: sql-vulnerable[<source>]`
    /// annotation appears on the call line. **Collapsing**.
    pub sql_vulnerable_annotation: Option<String>,
}

impl Evidence {
    #[cfg(test)]
    pub(super) fn empty() -> Self {
        Self::default()
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// Prediction
// ─────────────────────────────────────────────────────────────────────────────

#[derive(Debug, Clone)]
pub(super) struct Prediction {
    pub predicted: BranchLabel,
    pub alternative_branch: AlternativeBranch,
    pub predicted_severity: Severity,
    pub reasons: Vec<PredictionReason>,
    pub resolutions: Vec<ResolutionSignal>,
}

// ─────────────────────────────────────────────────────────────────────────────
// Scorer
// ─────────────────────────────────────────────────────────────────────────────

/// Build a [`Prediction`] from extracted [`Evidence`].
///
/// # Algorithm
///
/// 1. **Collapsing annotations first.** `sql_safe_annotation` →
///    Benign; `sql_vulnerable_annotation` → RealBug.
/// 2. **Step 1.5 D1.a Parameterized/ORM Benign collapse.** If `api ==
///    SqlApi::Safe`, commit to **Benign / Info**.
/// 3. **Step 1.5 D1.b String-formatted RealBug collapse.** If `api ==
///    SqlApi::Unsafe`, commit to **RealBug / Critical**.
/// 4. **Step 1.5 D1.c `.raw()` Escape-Hatch RealBug collapse.** If
///    `api == SqlApi::UnsafeRaw`, commit to **RealBug / Critical**.
/// 5. **Weighted scoring** (Ambiguous / Unknown only). Sum weights.
/// 6. **Tiebreak**: sum exactly 0.0 → predict RealBug. Conservative
///    default for security findings.
pub(super) fn predict(evidence: &Evidence) -> Prediction {
    let api = evidence.api.unwrap_or(SqlApi::Unknown);
    let api_label = api.callee_label();

    // ── Step 1: collapsing annotations. ──
    if let Some(reason) = &evidence.sql_safe_annotation {
        return collapse(
            BranchLabel::Benign,
            api,
            0.0,
            ResolutionSignal {
                kind: ResolutionKind::SourceAnnotation {
                    syntax: format!("# repotoire: sql-safe[{reason}]"),
                },
                description: format!(
                    "`sql-safe[{reason}]` annotation declares this \
                     SQL call site as safe (whitelisted table name, \
                     audited internal source, cross-statement validated, \
                     etc.); the finding collapses to Info."
                ),
                example: Some(format!("{api_label}(...)  # repotoire: sql-safe[{reason}]")),
                collapses_to: BranchLabel::Benign,
            },
            PredictionReason {
                kind: PredictionReasonKind::Custom {
                    description: format!("sql-safe[{reason}] annotation"),
                },
                weight: 1.0,
                note: format!(
                    "Annotated as safely-constructed ({reason}); not a SQL injection risk."
                ),
            },
        );
    }
    if let Some(source) = &evidence.sql_vulnerable_annotation {
        return collapse(
            BranchLabel::RealBug,
            api,
            -1.0,
            ResolutionSignal {
                kind: ResolutionKind::SourceAnnotation {
                    syntax: format!("# repotoire: sql-vulnerable[{source}]"),
                },
                description: format!(
                    "`sql-vulnerable[{source}]` annotation declares this \
                     SQL call site as exposed (third-party shim, dynamic \
                     query builder the predictor can't trace, audited-\
                     untrusted, etc.); the finding stays at the existing \
                     severity."
                ),
                example: Some(format!(
                    "{api_label}(...)  # repotoire: sql-vulnerable[{source}]"
                )),
                collapses_to: BranchLabel::RealBug,
            },
            PredictionReason {
                kind: PredictionReasonKind::Custom {
                    description: format!("sql-vulnerable[{source}] annotation"),
                },
                weight: -1.0,
                note: format!("Annotated as sql-exposed (source: {source})."),
            },
        );
    }

    // ── Step 1.5a: D1.a Parameterized/ORM Benign-direction collapse. ──
    if api.collapses_safe() {
        return collapse(
            BranchLabel::Benign,
            api,
            0.0,
            ResolutionSignal {
                kind: ResolutionKind::StructuralPattern {
                    description: "Parameterized SQL call / ORM filter expression".to_string(),
                },
                description: "The SQL call site uses a parameterized \
                     API: a string-literal query argument paired with a \
                     bound-values argument (`cursor.execute(\"SELECT ... \
                     %s\", (val,))`), an ORM keyword-filter expression \
                     (`Model.objects.filter(id=val)`), or a SQLAlchemy \
                     `text()` + bound-params dict pair \
                     (`db.execute(text(\"SELECT :id\"), {\"id\": val})`). \
                     The driver / ORM separates the SQL grammar from the \
                     bound values — user input cannot become part of the \
                     SQL string. The call is safe by structural \
                     construction."
                    .to_string(),
                example: Some(
                    "cursor.execute(\"SELECT * FROM users WHERE id = %s\", (user_id,))  # safe"
                        .to_string(),
                ),
                collapses_to: BranchLabel::Benign,
            },
            PredictionReason {
                kind: PredictionReasonKind::StructuralPattern {
                    description: "Parameterized SQL / ORM filter".to_string(),
                },
                weight: W_API_SAFE_COLLAPSE,
                note: "The call site is a parameterized SQL or ORM \
                       keyword-filter expression: the driver / ORM binds \
                       values separately from the SQL grammar. Phase 2j \
                       D1.a amendment: trifecta Step 1.5 collapse — \
                       Benign direction. This is the AST-dominant Safe \
                       classification."
                    .to_string(),
            },
        );
    }

    // ── Step 1.5b: D1.b String-formatted RealBug-direction collapse. ──
    if api.collapses_unsafe() {
        return collapse(
            BranchLabel::RealBug,
            api,
            -1.0,
            ResolutionSignal {
                kind: ResolutionKind::StructuralPattern {
                    description: "String-formatted SQL (f-string / concat / format / %)"
                        .to_string(),
                },
                description: "The SQL call's first argument is an \
                     f-string, a string concatenation expression, a \
                     `.format()` method call, or a `%`-operator \
                     expression that interpolates user input into the \
                     SQL grammar. The interpolated value becomes part \
                     of the SQL string at runtime — there is no \
                     parameterization. The textbook CWE-89 SQL \
                     injection shape."
                    .to_string(),
                example: Some(
                    "cursor.execute(f\"SELECT * FROM users WHERE id = {request.form['id']}\")"
                        .to_string(),
                ),
                collapses_to: BranchLabel::RealBug,
            },
            PredictionReason {
                kind: PredictionReasonKind::StructuralPattern {
                    description: "String-formatted SQL argument".to_string(),
                },
                weight: W_API_UNSAFE_COLLAPSE,
                note: "The SQL call's first argument uses f-string / \
                       `+`-concat / `.format()` / `%`-operator with \
                       SQL keywords. Phase 2j D1.b amendment: trifecta \
                       Step 1.5 collapse — RealBug direction via \
                       string-formatted-SQL structural pattern. \
                       Textbook CWE-89."
                    .to_string(),
            },
        );
    }

    // ── Step 1.5c: D1.c `.raw()` Escape-Hatch RealBug-direction collapse. ──
    if api.collapses_unsafe_raw() {
        return collapse(
            BranchLabel::RealBug,
            api,
            -1.0,
            ResolutionSignal {
                kind: ResolutionKind::StructuralPattern {
                    description: "ORM .raw() escape hatch with formatted SQL".to_string(),
                },
                description: "The call site is `<Model>.objects.raw(<formatted \
                     SQL>)` — the Django ORM raw-SQL escape hatch \
                     invoked with f-string / concat / `.format()` user \
                     input. Django developers commonly think `.raw()` \
                     is safe because \"it's on the ORM,\" but `.raw()` \
                     takes a raw SQL string and applies no \
                     parameterization beyond what the developer wires \
                     in. Combined with string interpolation, `.raw()` \
                     is exactly the textbook SQL-injection shape — \
                     distinct from `.filter()` / `.get()` which use \
                     keyword-bound values."
                    .to_string(),
                example: Some(
                    "User.objects.raw(\"SELECT * FROM users WHERE id = \" + request.GET['id'])"
                        .to_string(),
                ),
                collapses_to: BranchLabel::RealBug,
            },
            PredictionReason {
                kind: PredictionReasonKind::StructuralPattern {
                    description: "Django .raw() escape hatch with formatted SQL".to_string(),
                },
                weight: W_API_UNSAFE_RAW_COLLAPSE,
                note: "The call uses `<Model>.objects.raw(...)` with \
                       formatted SQL (f-string / concat / format / %). \
                       Phase 2j D1.c amendment: trifecta Step 1.5 \
                       collapse — RealBug direction via Django ORM \
                       raw-SQL escape hatch. This is the Phase 2j \
                       headline distinction: `.raw()` is NOT the same \
                       safety contract as `.filter()`."
                    .to_string(),
            },
        );
    }

    // ── Step 2: weighted scoring (Ambiguous / Unknown shapes). ──
    let mut sum: f32 = 0.0;
    let mut reasons: Vec<PredictionReason> = Vec::new();

    match evidence.user_input_source {
        UserInputSource::TypedString => {
            sum += W_USER_INPUT_TYPED_STRING_NEARBY;
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::StructuralPattern {
                    description:
                        "user input from typed-string source (request.form / request.args)"
                            .to_string(),
                },
                weight: W_USER_INPUT_TYPED_STRING_NEARBY,
                note: "User input from a typed-string source \
                       (`request.form`, `request.args`, `request.GET`). \
                       Unlike Phase 2i (where pymongo's BSON \
                       serialization made TypedString safer), for SQL \
                       the source's Python type is not load-bearing — \
                       f-string interpolation flattens both TypedString \
                       and UnstructuredJson into the same SQL string. \
                       Both source families weigh negatively. (D5.2 \
                       honest-review finding.)"
                    .to_string(),
            });
        }
        UserInputSource::UnstructuredJson => {
            sum += W_USER_INPUT_UNSTRUCTURED_JSON_NEARBY;
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::StructuralPattern {
                    description:
                        "user input from unstructured-JSON source (request.json / request.body)"
                            .to_string(),
                },
                weight: W_USER_INPUT_UNSTRUCTURED_JSON_NEARBY,
                note: "User input from an unstructured-JSON source \
                       (`request.json`, `request.get_json()`, \
                       `request.body`). For SQL, same magnitude as \
                       TypedString because string interpolation \
                       flattens both. D5.2 honest-review finding."
                    .to_string(),
            });
        }
        UserInputSource::None => {}
    }

    if evidence.static_sql_string_literal {
        sum += W_STATIC_SQL_STRING_LITERAL;
        reasons.push(PredictionReason {
            kind: PredictionReasonKind::StructuralPattern {
                description: "static SQL string literal (no interpolation)".to_string(),
            },
            weight: W_STATIC_SQL_STRING_LITERAL,
            note: "The call's first argument is a string literal \
                   containing SQL keywords with no f-string / concat / \
                   format / %% formatting markers. Strong Benign signal: \
                   the developer wrote a literal SQL string, often \
                   paired with parameterized placeholders."
                .to_string(),
        });
    }

    if evidence.sql_keyword_no_formatting {
        sum += W_SQL_KEYWORD_NO_FORMATTING;
        reasons.push(PredictionReason {
            kind: PredictionReasonKind::StructuralPattern {
                description: "SQL keyword present, no formatting markers in argument".to_string(),
            },
            weight: W_SQL_KEYWORD_NO_FORMATTING,
            note: "The line contains a SQL keyword \
                   (SELECT/INSERT/UPDATE/DELETE) but the call's first \
                   argument shows no formatting markers. Soft Benign — \
                   typical literal-SQL-with-parameters pattern."
                .to_string(),
        });
    }

    if evidence.enclosing_route_handler {
        sum += W_ENCLOSING_ROUTE_HANDLER;
        if let Some(fn_name) = &evidence.enclosing_function {
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::EnclosingScope {
                    scope_kind: "route_handler".to_string(),
                    name: fn_name.clone(),
                },
                weight: W_ENCLOSING_ROUTE_HANDLER,
                note: "Enclosing function is a route handler (decorator \
                       or naming convention); higher prior on attacker-\
                       reachable SQL code. Lighter weight than 2e–2h \
                       because the AST classification is doing most of \
                       the work — the handler prior is a thin tiebreaker."
                    .to_string(),
            });
        } else {
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::StructuralPattern {
                    description: "enclosing route handler context".to_string(),
                },
                weight: W_ENCLOSING_ROUTE_HANDLER,
                note: "Call site is in a route-handler context.".to_string(),
            });
        }
    }

    if evidence.trust_boundary_name {
        sum += W_TRUST_BOUNDARY_NAME;
        if let Some(fn_name) = &evidence.enclosing_function {
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::EnclosingScope {
                    scope_kind: "trust_boundary".to_string(),
                    name: fn_name.clone(),
                },
                weight: W_TRUST_BOUNDARY_NAME,
                note: "Enclosing function name contains a trust-boundary \
                       keyword (_trusted/_admin/_internal/_validated/_signed) \
                       — developer-authored signal that data has been verified."
                    .to_string(),
            });
        }
    }

    if let Some(fn_name) = &evidence.enclosing_function {
        if matches_test_function(fn_name) {
            sum += W_ENCLOSING_TEST_FUNCTION;
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::EnclosingScope {
                    scope_kind: "function".to_string(),
                    name: fn_name.clone(),
                },
                weight: W_ENCLOSING_TEST_FUNCTION,
                note: format!(
                    "Enclosing function `{fn_name}` looks like a \
                     test/fixture; test code rarely the actionable \
                     security target."
                ),
            });
        }
    }

    // ── Step 3: tiebreak + severity mapping. ──
    let predicted = if sum > 0.0 {
        BranchLabel::Benign
    } else {
        BranchLabel::RealBug
    };

    build_prediction(predicted, api, sum, reasons, Vec::new())
}

// ─────────────────────────────────────────────────────────────────────────────
// Helpers
// ─────────────────────────────────────────────────────────────────────────────

fn matches_test_function(name: &str) -> bool {
    let lower = name.to_lowercase();
    TEST_FUNCTION_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(sub))
}

/// True iff the function name matches any route-handler naming
/// convention. The evidence extractor combines this with decorator
/// detection.
pub(super) fn matches_route_handler_name(name: &str) -> bool {
    let lower = name.to_lowercase();
    ROUTE_HANDLER_NAME_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(sub))
}

/// True iff the function name contains a trust-boundary keyword.
pub(super) fn matches_trust_boundary_name(name: &str) -> bool {
    let lower = name.to_lowercase();
    TRUST_BOUNDARY_NAME_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(sub))
}

/// True iff a line (intended to be a decorator line preceding a
/// function definition) matches any route-handler decorator pattern.
pub(super) fn matches_route_handler_decorator(line: &str) -> bool {
    let trimmed = line.trim();
    ROUTE_HANDLER_DECORATOR_SUBSTRINGS
        .iter()
        .any(|sub| trimmed.starts_with(sub))
}

/// Classify a line's user-input source family. Returns the first
/// match found (UnstructuredJson takes priority if both appear on the
/// same line). Returns `None` if no recognized user-input identifier
/// appears.
pub(super) fn classify_user_input_source(line: &str) -> UserInputSource {
    let lower = line.to_lowercase();
    for s in UNSTRUCTURED_JSON_USER_INPUT_SUBSTRINGS {
        if lower.contains(&s.to_lowercase()) {
            return UserInputSource::UnstructuredJson;
        }
    }
    for s in TYPED_STRING_USER_INPUT_SUBSTRINGS {
        if lower.contains(&s.to_lowercase()) {
            return UserInputSource::TypedString;
        }
    }
    UserInputSource::None
}

/// True iff the line contains any SQL keyword. Case-insensitive.
pub(super) fn line_contains_sql_keyword(line: &str) -> bool {
    let upper = line.to_uppercase();
    SQL_KEYWORDS.iter().any(|kw| upper.contains(kw))
}

/// True iff the given Django ORM method is one of the safe keyword-
/// filter methods (`filter`, `get`, `create`, `update`, etc.).
pub(super) fn is_safe_django_orm_method(method: &str) -> bool {
    SAFE_DJANGO_ORM_METHODS.contains(&method)
}

/// True iff the given Django ORM method is one of the raw-SQL escape
/// hatch methods (`raw`, `extra`).
pub(super) fn is_unsafe_raw_django_orm_method(method: &str) -> bool {
    UNSAFE_RAW_DJANGO_ORM_METHODS.contains(&method)
}

/// True iff the given callee-method name is one of the generic SQL
/// sink methods (`execute`, `executemany`, etc.).
pub(super) fn is_sql_sink_method(method: &str) -> bool {
    SQL_SINK_METHODS.contains(&method)
}

/// True iff the given identifier names a SQLAlchemy `text()`-style
/// parameterized-SQL constructor.
pub(super) fn is_sqlalchemy_text_function(name: &str) -> bool {
    SQLALCHEMY_TEXT_FUNCTION_NAMES.contains(&name)
}

fn collapse(
    label: BranchLabel,
    api: SqlApi,
    forced_sum: f32,
    resolution: ResolutionSignal,
    reason: PredictionReason,
) -> Prediction {
    build_prediction(label, api, forced_sum, vec![reason], vec![resolution])
}

fn build_prediction(
    predicted: BranchLabel,
    api: SqlApi,
    sum: f32,
    reasons: Vec<PredictionReason>,
    resolutions: Vec<ResolutionSignal>,
) -> Prediction {
    let api_label = api.callee_label();
    let predicted_severity = severity_for_branch(predicted, sum);
    let alternative_label = predicted.opposite();
    let alternative_severity = severity_for_branch(alternative_label, sum);

    let alternative_branch = AlternativeBranch {
        label: alternative_label,
        severity: alternative_severity,
        title: title_for_branch(alternative_label, api_label),
        description: description_for_branch(alternative_label, api_label),
        suggested_fix: suggested_fix_for_branch(alternative_label, api_label),
    };

    Prediction {
        predicted,
        alternative_branch,
        predicted_severity,
        reasons,
        resolutions,
    }
}

/// D3 severity mapping. RealBug severity buckets from the weighted sum
/// (or forced to Critical for the collapse paths):
///
/// - `sum <= -0.7` → Critical
/// - `-0.7 < sum <= -0.4` → High
/// - `-0.4 < sum < 0.0` → Medium
/// - `sum == 0.0` → Medium (tiebreak)
///
/// Benign → Info.
fn severity_for_branch(label: BranchLabel, sum: f32) -> Severity {
    match label {
        BranchLabel::RealBug => {
            if sum <= -0.7 {
                Severity::Critical
            } else if sum <= -0.4 {
                Severity::High
            } else {
                Severity::Medium
            }
        }
        BranchLabel::Benign => Severity::Info,
    }
}

fn title_for_branch(label: BranchLabel, api_label: &str) -> String {
    match label {
        BranchLabel::RealBug => format!("Potential SQL injection via {api_label}"),
        BranchLabel::Benign => {
            format!("Safe SQL call ({api_label}) — informational")
        }
    }
}

fn description_for_branch(label: BranchLabel, api_label: &str) -> String {
    match label {
        BranchLabel::RealBug => format!(
            "The `{api_label}` call appears to construct a SQL query \
             via string interpolation (f-string / concat / format / %), \
             or invokes the Django `.raw()` escape hatch with user-\
             reachable input. SQL injection allows attackers to access, \
             modify, or delete database records and in some cases \
             execute operating-system commands."
        ),
        BranchLabel::Benign => format!(
            "The `{api_label}` call appears to use a parameterized SQL \
             API (string-literal + bound values), an ORM keyword-filter \
             expression (`Model.objects.filter(...)`), or a SQLAlchemy \
             `text() + bound-params dict` pair. The call is carried as \
             Info; the RealBug interpretation is preserved in \
             `alternative_branch` in case the predictor is wrong."
        ),
    }
}

fn suggested_fix_for_branch(label: BranchLabel, _api_label: &str) -> Option<String> {
    match label {
        BranchLabel::RealBug => Some(
            "Sanitize the SQL construction:\n\n\
             ```python\n\
             # Instead of:\n\
             cursor.execute(f\"SELECT * FROM users WHERE id = {request.form['id']}\")\n\
             User.objects.raw(\"SELECT * FROM users WHERE id = \" + request.GET['id'])\n\
             \n\
             # Use parameterized queries:\n\
             cursor.execute(\"SELECT * FROM users WHERE id = %s\", (request.form['id'],))\n\
             cursor.execute(\"SELECT * FROM users WHERE id = ?\", [request.form['id']])\n\
             \n\
             # Or the ORM:\n\
             User.objects.filter(id=request.GET['id'])\n\
             \n\
             # Or SQLAlchemy text() + bound params:\n\
             db.execute(text(\"SELECT * FROM users WHERE id = :id\"), {\"id\": user_id})\n\
             ```\n\n\
             If the call is intentionally constructing a complex query \
             that the predictor cannot trace (cross-statement assembly, \
             dynamic query builder, etc.), annotate the call site with \
             `# repotoire: sql-safe[<reason>]` to collapse the finding \
             to Info."
                .to_string(),
        ),
        BranchLabel::Benign => Some(
            "If this is intentional safe usage, annotate \
             `# repotoire: sql-safe[<reason>]` to collapse the finding \
             to Info definitively. If the alternative branch is correct \
             (the call IS exposed to attacker-controlled SQL via a \
             path the predictor missed), audit the call's input source \
             classification and consider switching to a parameterized \
             API or ORM filter expression."
                .to_string(),
        ),
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// Annotation lookup helpers (called by evidence extraction)
// ─────────────────────────────────────────────────────────────────────────────

/// If `line` carries `# repotoire: sql-safe[<reason>]`, return the
/// reason. Defaults to `"unspecified"` if no arg supplied.
pub(super) fn extract_sql_safe_reason(line: &str) -> Option<String> {
    let ann = parse_python_comment(line)?;
    if ann.kind != "sql-safe" {
        return None;
    }
    if ann.args.is_empty() {
        Some("unspecified".to_string())
    } else {
        Some(ann.args[0].clone())
    }
}

/// If `line` carries `# repotoire: sql-vulnerable[<source>]`, return
/// the source. Defaults to `"unspecified"` if no arg supplied.
pub(super) fn extract_sql_vulnerable_source(line: &str) -> Option<String> {
    let ann = parse_python_comment(line)?;
    if ann.kind != "sql-vulnerable" {
        return None;
    }
    if ann.args.is_empty() {
        Some("unspecified".to_string())
    } else {
        Some(ann.args[0].clone())
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// Tests
// ─────────────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    // ─── Worked example A (decisions §6): Parameterized psycopg2. ───
    #[test]
    fn case_a_parameterized_execute_predicts_benign_info() {
        let evidence = Evidence {
            api: Some(SqlApi::Safe),
            user_input_source: UserInputSource::TypedString,
            enclosing_route_handler: true,
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
        assert_eq!(p.resolutions.len(), 1);
        assert!(matches!(
            p.resolutions[0].kind,
            ResolutionKind::StructuralPattern { .. }
        ));
        assert_eq!(p.resolutions[0].collapses_to, BranchLabel::Benign);
    }

    // ─── Worked example B: f-string SQL. ───
    #[test]
    fn case_b_fstring_predicts_realbug_critical() {
        let evidence = Evidence {
            api: Some(SqlApi::Unsafe),
            user_input_source: UserInputSource::TypedString,
            enclosing_route_handler: true,
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Critical);
        assert_eq!(p.resolutions.len(), 1);
        assert_eq!(p.resolutions[0].collapses_to, BranchLabel::RealBug);
    }

    // ─── Worked example C: type-cast-laundered .format() (v0 limitation). ───
    #[test]
    fn case_c_type_cast_laundered_format_overfires_realbug() {
        // Per D5.3: the AST sees `.format()` inside the SQL argument,
        // so the predictor collapses to RealBug Critical. The
        // developer's int() cast is invisible to the v0 predictor.
        let evidence = Evidence {
            api: Some(SqlApi::Unsafe),
            user_input_source: UserInputSource::TypedString,
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Critical);
    }

    // ─── Worked example D: Django ORM filter. ───
    #[test]
    fn case_d_django_orm_filter_predicts_benign_info() {
        let evidence = Evidence {
            api: Some(SqlApi::Safe),
            user_input_source: UserInputSource::TypedString,
            enclosing_route_handler: true,
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
    }

    // ─── Worked example E (HEADLINE): Django .raw() with concat. ───
    #[test]
    fn case_e_django_raw_with_concat_predicts_realbug_critical() {
        // The Phase 2j headline distinction.
        let evidence = Evidence {
            api: Some(SqlApi::UnsafeRaw),
            user_input_source: UserInputSource::TypedString,
            enclosing_route_handler: true,
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Critical);
        assert_eq!(p.resolutions.len(), 1);
        assert!(matches!(
            p.resolutions[0].kind,
            ResolutionKind::StructuralPattern { .. }
        ));
    }

    // ─── Worked example F: static literal SQL. ───
    #[test]
    fn case_f_static_literal_sql_predicts_benign_info() {
        let evidence = Evidence {
            api: Some(SqlApi::Safe),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
    }

    // ─── Worked example G: opaque variable with user input + handler. ───
    #[test]
    fn case_g_opaque_var_with_user_input_handler_predicts_realbug_high() {
        // -0.30 (TypedString) - 0.20 (handler) = -0.50 → High.
        let evidence = Evidence {
            api: Some(SqlApi::Ambiguous),
            user_input_source: UserInputSource::TypedString,
            enclosing_route_handler: true,
            enclosing_function: Some("get_user_endpoint".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::High);
    }

    // ─── Worked example G': opaque variable, no nearby input → Medium. ───
    #[test]
    fn case_g_prime_opaque_var_no_user_input_predicts_realbug_medium() {
        // No additive signals → sum = 0.0 → tiebreak RealBug Medium.
        let evidence = Evidence {
            api: Some(SqlApi::Ambiguous),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Medium);
    }

    // ─── Worked example H: SQLAlchemy text() + bound dict. ───
    #[test]
    fn case_h_sqlalchemy_text_with_binds_predicts_benign_info() {
        let evidence = Evidence {
            api: Some(SqlApi::Safe),
            user_input_source: UserInputSource::TypedString,
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
    }

    // ─── D1.a Safe collapse dominates every other signal ───
    #[test]
    fn safe_collapse_dominates_handler_and_user_input() {
        let evidence = Evidence {
            api: Some(SqlApi::Safe),
            user_input_source: UserInputSource::UnstructuredJson,
            enclosing_route_handler: true,
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
    }

    // ─── D1.b Unsafe collapse dominates every other signal ───
    #[test]
    fn unsafe_collapse_dominates_test_function() {
        let evidence = Evidence {
            api: Some(SqlApi::Unsafe),
            enclosing_function: Some("test_query_builder".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Critical);
    }

    // ─── D1.c UnsafeRaw collapse dominates every other signal ───
    #[test]
    fn unsafe_raw_collapse_dominates_trust_boundary() {
        let evidence = Evidence {
            api: Some(SqlApi::UnsafeRaw),
            trust_boundary_name: true,
            enclosing_function: Some("admin_query_trusted".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Critical);
    }

    // ─── Ambiguous: weighted scoring fires ───
    #[test]
    fn ambiguous_with_unstructured_json_and_handler_predicts_realbug_high() {
        // -0.30 - 0.20 = -0.50 → High.
        let evidence = Evidence {
            api: Some(SqlApi::Ambiguous),
            user_input_source: UserInputSource::UnstructuredJson,
            enclosing_route_handler: true,
            enclosing_function: Some("api_handler".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::High);
    }

    #[test]
    fn ambiguous_with_typed_string_only_predicts_realbug_medium() {
        // -0.30 → between -0.4 and 0.0 → Medium.
        let evidence = Evidence {
            api: Some(SqlApi::Ambiguous),
            user_input_source: UserInputSource::TypedString,
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Medium);
    }

    #[test]
    fn ambiguous_test_function_predicts_benign() {
        // +0.15 (test) → Benign.
        let evidence = Evidence {
            api: Some(SqlApi::Ambiguous),
            enclosing_function: Some("test_user_query".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
    }

    #[test]
    fn ambiguous_static_sql_literal_predicts_benign() {
        // +0.40 → Benign.
        let evidence = Evidence {
            api: Some(SqlApi::Ambiguous),
            static_sql_string_literal: true,
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
    }

    #[test]
    fn ambiguous_sql_keyword_no_formatting_predicts_benign() {
        // +0.10 → Benign.
        let evidence = Evidence {
            api: Some(SqlApi::Ambiguous),
            sql_keyword_no_formatting: true,
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
    }

    #[test]
    fn ambiguous_trust_boundary_name_predicts_benign() {
        // +0.10 → Benign.
        let evidence = Evidence {
            api: Some(SqlApi::Ambiguous),
            trust_boundary_name: true,
            enclosing_function: Some("admin_query_trusted".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
    }

    // ─── Annotations ───
    #[test]
    fn sql_safe_annotation_collapses_to_benign() {
        let evidence = Evidence {
            api: Some(SqlApi::Unsafe), // even Unsafe shape
            sql_safe_annotation: Some("type-cast-laundered".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
        assert_eq!(p.resolutions.len(), 1);
        assert!(matches!(
            p.resolutions[0].kind,
            ResolutionKind::SourceAnnotation { .. }
        ));
    }

    #[test]
    fn sql_vulnerable_annotation_collapses_to_realbug() {
        let evidence = Evidence {
            api: Some(SqlApi::Safe), // even Safe shape
            sql_vulnerable_annotation: Some("dynamic-query-builder".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Critical);
    }

    // ─── Tiebreak ───
    #[test]
    fn empty_evidence_tiebreaks_realbug_medium() {
        let p = predict(&Evidence::empty());
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Medium);
    }

    // ─── Sign convention ───
    #[test]
    #[allow(clippy::assertions_on_constants)]
    fn realbug_signal_weights_are_negative() {
        assert!(W_USER_INPUT_UNSTRUCTURED_JSON_NEARBY < 0.0);
        assert!(W_USER_INPUT_TYPED_STRING_NEARBY < 0.0);
        assert!(W_ENCLOSING_ROUTE_HANDLER < 0.0);
        assert!(W_API_UNSAFE_COLLAPSE < 0.0);
        assert!(W_API_UNSAFE_RAW_COLLAPSE < 0.0);
    }

    #[test]
    #[allow(clippy::assertions_on_constants)]
    fn benign_signal_weights_are_positive() {
        assert!(W_STATIC_SQL_STRING_LITERAL > 0.0);
        assert!(W_SQL_KEYWORD_NO_FORMATTING > 0.0);
        assert!(W_TRUST_BOUNDARY_NAME > 0.0);
        assert!(W_ENCLOSING_TEST_FUNCTION > 0.0);
        assert!(W_API_SAFE_COLLAPSE > 0.0);
    }

    // ─── Honest review pin: D5.2 source-family weight symmetry for SQL ───
    #[test]
    #[allow(clippy::assertions_on_constants)]
    fn d5_2_typed_string_and_unstructured_json_same_magnitude_for_sql() {
        // Unlike 2i (TypedString = +0.20, UnstructuredJson = -0.30),
        // for SQL both source families have the SAME (negative)
        // magnitude — f-string interpolation flattens Python type.
        // D5.2 honest-review finding.
        assert!(
            W_USER_INPUT_TYPED_STRING_NEARBY < 0.0,
            "TypedString must be negative for SQL (interpolation flattens type)"
        );
        assert!(
            W_USER_INPUT_UNSTRUCTURED_JSON_NEARBY < 0.0,
            "UnstructuredJson must be negative for SQL"
        );
        assert_eq!(
            W_USER_INPUT_TYPED_STRING_NEARBY.abs(),
            W_USER_INPUT_UNSTRUCTURED_JSON_NEARBY.abs(),
            "For SQL, both source families have the same magnitude (D5.2)"
        );
    }

    // ─── SqlApi helpers ───
    #[test]
    fn sql_api_collapses_predicates() {
        assert!(SqlApi::Safe.collapses_safe());
        assert!(!SqlApi::Safe.collapses_unsafe());
        assert!(!SqlApi::Safe.collapses_unsafe_raw());

        assert!(SqlApi::Unsafe.collapses_unsafe());
        assert!(!SqlApi::Unsafe.collapses_safe());
        assert!(!SqlApi::Unsafe.collapses_unsafe_raw());

        assert!(SqlApi::UnsafeRaw.collapses_unsafe_raw());
        assert!(!SqlApi::UnsafeRaw.collapses_safe());
        assert!(!SqlApi::UnsafeRaw.collapses_unsafe());

        assert!(!SqlApi::Ambiguous.collapses_safe());
        assert!(!SqlApi::Ambiguous.collapses_unsafe());
        assert!(!SqlApi::Ambiguous.collapses_unsafe_raw());

        assert!(!SqlApi::Unknown.collapses_safe());
        assert!(!SqlApi::Unknown.collapses_unsafe());
        assert!(!SqlApi::Unknown.collapses_unsafe_raw());
    }

    #[test]
    fn sql_api_is_recognized() {
        assert!(SqlApi::Safe.is_recognized());
        assert!(SqlApi::Unsafe.is_recognized());
        assert!(SqlApi::UnsafeRaw.is_recognized());
        assert!(SqlApi::Ambiguous.is_recognized());
        assert!(!SqlApi::Unknown.is_recognized());
    }

    // ─── User-input-source classifier ───
    #[test]
    fn classify_request_form_as_typed_string() {
        assert_eq!(
            classify_user_input_source("u = request.form['user']"),
            UserInputSource::TypedString
        );
        assert_eq!(
            classify_user_input_source("uid = request.GET['id']"),
            UserInputSource::TypedString
        );
    }

    #[test]
    fn classify_request_json_as_unstructured_json() {
        assert_eq!(
            classify_user_input_source("p = request.json"),
            UserInputSource::UnstructuredJson
        );
        assert_eq!(
            classify_user_input_source("body = request.get_json()"),
            UserInputSource::UnstructuredJson
        );
    }

    #[test]
    fn classify_unstructured_json_priority_when_both_present() {
        let src = "x = request.form['a'] or request.json";
        assert_eq!(
            classify_user_input_source(src),
            UserInputSource::UnstructuredJson
        );
    }

    #[test]
    fn classify_no_user_input() {
        assert_eq!(
            classify_user_input_source("x = compute()"),
            UserInputSource::None
        );
    }

    // ─── SQL keyword classifier ───
    #[test]
    fn line_contains_sql_keyword_recognizes_select() {
        assert!(line_contains_sql_keyword("SELECT * FROM users"));
        assert!(line_contains_sql_keyword("select * from users"));
        assert!(line_contains_sql_keyword("INSERT INTO log VALUES (1)"));
        assert!(line_contains_sql_keyword(
            "cursor.execute(\"DELETE FROM x\")"
        ));
    }

    #[test]
    fn line_contains_sql_keyword_no_match() {
        assert!(!line_contains_sql_keyword("x = compute(payload)"));
        assert!(!line_contains_sql_keyword("foo = bar"));
    }

    // ─── ORM method classifier ───
    #[test]
    fn safe_django_orm_method_recognition() {
        assert!(is_safe_django_orm_method("filter"));
        assert!(is_safe_django_orm_method("get"));
        assert!(is_safe_django_orm_method("create"));
        assert!(is_safe_django_orm_method("update"));
        assert!(!is_safe_django_orm_method("raw"));
        assert!(!is_safe_django_orm_method("extra"));
    }

    #[test]
    fn unsafe_raw_django_orm_method_recognition() {
        assert!(is_unsafe_raw_django_orm_method("raw"));
        assert!(is_unsafe_raw_django_orm_method("extra"));
        assert!(!is_unsafe_raw_django_orm_method("filter"));
        assert!(!is_unsafe_raw_django_orm_method("get"));
    }

    #[test]
    fn sql_sink_method_recognition() {
        assert!(is_sql_sink_method("execute"));
        assert!(is_sql_sink_method("executemany"));
        assert!(is_sql_sink_method("executescript"));
        assert!(!is_sql_sink_method("filter"));
        assert!(!is_sql_sink_method("compute"));
    }

    #[test]
    fn sqlalchemy_text_function_recognition() {
        assert!(is_sqlalchemy_text_function("text"));
        assert!(is_sqlalchemy_text_function("literal_column"));
        assert!(!is_sqlalchemy_text_function("execute"));
    }

    // ─── Lexicon helpers ───
    #[test]
    fn route_handler_decorator_matches() {
        assert!(matches_route_handler_decorator("@app.route('/foo')"));
        assert!(matches_route_handler_decorator("    @app.post('/x')"));
        assert!(matches_route_handler_decorator("@router.get('/v1')"));
        assert!(!matches_route_handler_decorator("@dataclass"));
    }

    #[test]
    fn route_handler_name_matches() {
        assert!(matches_route_handler_name("login_handler"));
        assert!(matches_route_handler_name("user_endpoint"));
        assert!(!matches_route_handler_name("compute_total"));
    }

    #[test]
    fn trust_boundary_name_matches() {
        assert!(matches_trust_boundary_name("query_trusted"));
        assert!(matches_trust_boundary_name("load_admin_query"));
        assert!(matches_trust_boundary_name("post_validated_filter"));
        assert!(matches_trust_boundary_name("read_internal_state"));
        assert!(!matches_trust_boundary_name("plain_query"));
    }

    // ─── Extract helpers ───
    #[test]
    fn extract_sql_safe_with_reason() {
        assert_eq!(
            extract_sql_safe_reason(
                "cursor.execute(q)  # repotoire: sql-safe[whitelisted-table-name]"
            ),
            Some("whitelisted-table-name".to_string())
        );
    }

    #[test]
    fn extract_sql_safe_without_reason() {
        assert_eq!(
            extract_sql_safe_reason("cursor.execute(q)  # repotoire: sql-safe"),
            Some("unspecified".to_string())
        );
    }

    #[test]
    fn extract_sql_vulnerable_with_source() {
        assert_eq!(
            extract_sql_vulnerable_source(
                "cursor.execute(q)  # repotoire: sql-vulnerable[dynamic-query-builder]"
            ),
            Some("dynamic-query-builder".to_string())
        );
    }

    #[test]
    fn extract_sql_ignores_other_kinds() {
        assert_eq!(
            extract_sql_safe_reason("x  # repotoire: nosql-safe[ok]"),
            None
        );
        assert_eq!(
            extract_sql_vulnerable_source("x  # repotoire: deserialize-vulnerable[ok]"),
            None
        );
    }
}