repotoire 0.8.0

//! Dual-branch predictor for Python SSRF HTTP-client call sites.
//!
//! Implements decisions D1 (weights, with safe-by-construction
//! Advocate collapse) and D3 (severity) from
//! `docs/superpowers/specs/2026-05-09-dual-branch-phase2-ssrf-decisions.md`.
//!
//! # What this module does
//!
//! Given a Python HTTP-client call site (`requests.get(...)`,
//! `urlopen(...)`, `httpx.get(...)`, `aiohttp.ClientSession.get(...)`,
//! `advocate.Session().get(...)`), produce a [`Prediction`] that:
//!
//! 1. Picks `RealBug` or `Benign` as the predicted branch.
//! 2. Carries the other branch as the alternative.
//! 3. Lists typed [`PredictionReason`]s the predictor used.
//! 4. Optionally lists [`ResolutionSignal`]s (collapsing or hint-grade).
//!
//! # The safe-by-construction collapse (D1 amendment)
//!
//! Like 2e's defusedxml, Advocate is a transport-layer enforcement
//! library that makes the URL's origin irrelevant. The predictor
//! treats any call on `advocate.*` as a **collapsing signal** —
//! same family as the `ssrf-safe` annotation. See the predict
//! function's Step 1.5 and the decisions-doc D1 amendment for the
//! full argument.
//!
//! # Sign convention
//!
//! `weight > 0` leans **Benign**; `weight < 0` leans **RealBug**.
//!
//! # Severity mapping (D3)
//!
//! - Predicted **RealBug** → `Severity::Critical` if user input flows
//!   to the URL, else `Severity::High`. This preserves the existing
//!   single-branch calibration at the old `ssrf.rs:178-205`.
//! - Predicted **Benign** → `Severity::Info`.
//! - Alternative branch carries the opposite label's severity.
//!
//! # Resolution signals (collapsing)
//!
//! Two annotations fully collapse the prediction:
//!
//! - `# repotoire: ssrf-safe[<reason>]` → `Benign` (Info).
//! - `# repotoire: ssrf-vulnerable[<source>]` → `RealBug` (severity from
//!   the existing High/Critical logic).
//!
//! # Why these weights
//!
//! See decision **D1** (with honest-review notes on the +0.30 Advocate
//! weight choice and the allowlist non-collapse). Numbers tagged
//! `TUNABLE`. Phase 3 misprediction logging is the right place to
//! retune.

use super::annotation::parse_python_comment;
use crate::dual_branch::{
    AlternativeBranch, BranchLabel, PredictionReason, PredictionReasonKind, ResolutionKind,
    ResolutionSignal,
};
use crate::models::Severity;

// ─────────────────────────────────────────────────────────────────────────────
// HttpApi — the HTTP-client-API enum for SSRF
// ─────────────────────────────────────────────────────────────────────────────

/// Which Python HTTP-client API the call site uses. Distinguishes the
/// safe-by-default family (`advocate.*`) from the unsafe-by-default
/// (`requests.*`, `urllib`, `httpx.*`, `aiohttp.*`, generic
/// `urllib3.*`).
///
/// Note: unlike `CommandApi`, this enum does NOT carry a `severity_for`
/// 2D table. SSRF severity is one-dimensional: Critical if user input
/// flows to the URL, else High (per old `ssrf.rs:178-205`). Branch
/// severity is computed by `severity_for_branch` below.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) enum HttpApi {
    /// `advocate.Session().get(...)`, `advocate.get(...)`, etc.
    /// Safe-by-construction: Advocate enforces the SSRF gate at the
    /// transport layer.
    Advocate,
    /// `requests.get`, `requests.post`, `requests.Session`, etc. The
    /// dominant Python HTTP client; not safe-by-default for SSRF.
    Requests,
    /// `urllib.request.urlopen`, `urllib.request.Request`, `urlopen`,
    /// `urllib2.urlopen` (legacy py2). Stdlib HTTP client, not safe-
    /// by-default.
    Urllib,
    /// `httpx.get`, `httpx.AsyncClient.get`, etc. Modern async HTTP
    /// client; not safe-by-default for SSRF.
    Httpx,
    /// `aiohttp.ClientSession.get`, `aiohttp.request`, etc. Async
    /// HTTP client; not safe-by-default for SSRF.
    Aiohttp,
    /// Some other HTTP client we recognized but can't classify (e.g.
    /// `urllib3` direct usage, `pycurl`). No special signal
    /// contribution beyond the base detection.
    Unknown,
}

impl HttpApi {
    /// Human-readable label for the API used in titles/descriptions.
    pub(super) fn callee_label(self) -> &'static str {
        match self {
            HttpApi::Advocate => "advocate",
            HttpApi::Requests => "requests",
            HttpApi::Urllib => "urllib",
            HttpApi::Httpx => "httpx",
            HttpApi::Aiohttp => "aiohttp",
            HttpApi::Unknown => "HTTP client",
        }
    }

    /// True iff the API is a recognized Python HTTP-client family
    /// (gates the Phase 2f dual-branch emission path: only Python
    /// sites get the predictor-aware shape; JS/Java/etc. still go
    /// through the legacy regex scanner per decisions D5).
    pub(super) fn is_python(self) -> bool {
        matches!(
            self,
            HttpApi::Advocate
                | HttpApi::Requests
                | HttpApi::Urllib
                | HttpApi::Httpx
                | HttpApi::Aiohttp
        )
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// Tunable weights
// ────────────────────────��────────────────────────────────────────────────────

// TUNABLE: see Phase 3 misprediction logging.
//
// Sign convention: positive leans Benign, negative leans RealBug.
//
// Calibration target (per decisions doc D1 worked examples):
//   * `from advocate import Session; s = Session(); s.get(req.body['url'])`
//     in a handler: Step 1.5 collapse → Benign / Info.
//   * `requests.get(req.body['url'])` in a Flask handler:
//     -0.50 + -0.30 = -0.80 → strongly RealBug.
//   * `is_safe_url(url); requests.get(url)` in a handler with user
//     input: +0.40 - 0.50 - 0.30 = -0.40 → still RealBug (conservative
//     v0 default — see D5 #2).
//   * Unused `import advocate` + naked `requests.get` in handler reading
//     req.body: +0.30 - 0.50 - 0.30 = -0.50 → correctly RealBug.

/// `advocate` module imported anywhere in the file. Weak Benign
/// signal: import is *capable* of being the safe pattern but doesn't
/// rule out a naked `requests.get` in the same file (D5 #1 v0
/// limitation). Mirrors 2e's `W_IMPORT_DEFUSEDXML` calibration.
pub(super) const W_IMPORT_ADVOCATE: f32 = 0.30;

/// `defusedurl` / `safe_url_check` / similar safe-URL wrappers
/// imported. Same calibration as Advocate import.
pub(super) const W_IMPORT_DEFUSEDURL: f32 = 0.30;

/// `validators` library imported (`validators.url(...)` is a syntactic
/// URL validator, not an SSRF gate per se). Weaker signal than the
/// transport-level wrappers.
pub(super) const W_IMPORT_VALIDATORS: f32 = 0.10;

/// An explicit allowlist/validator call appears within 10 lines
/// above the HTTP call: `is_safe_url(url)`, `validate_url(url)`,
/// `check_url(url)`, `validators.url(url, public=False)`. Trust
/// *presence* not correctness (D5 #2); strongest additive Benign
/// signal short of a collapsing wrapper.
pub(super) const W_ALLOWLIST_CALL: f32 = 0.40;

/// A scheme/hostname allowlist check: `parsed.scheme in {...}`
/// and/or `parsed.hostname in ALLOWED_HOSTS`. Looser than a single
/// allowlist callable.
pub(super) const W_SCHEME_HOSTNAME_ALLOWLIST: f32 = 0.30;

/// A private-IP guard via `ipaddress.ip_address(...).is_private`
/// (or `.is_loopback`, `.is_link_local`, `.is_reserved`,
/// `.is_multicast`) followed by a raise/return. Closes the
/// metadata-endpoint / internal-host leg specifically.
pub(super) const W_PRIVATE_IP_GUARD: f32 = 0.30;

/// Enclosing function looks like a test fixture.
pub(super) const W_ENCLOSING_TEST_FUNCTION: f32 = 0.15;

/// User input (request body / query / params / form) flows to the
/// HTTP call within a 10-line lookback window (carry-over of the
/// existing `has_user_input` heuristic at old `ssrf.rs:163` — but
/// narrowed from the legacy 20-line window to match 2e).
pub(super) const W_USER_INPUT_FLOW: f32 = -0.50;

/// Enclosing function looks like a request handler.
pub(super) const W_ENCLOSING_HANDLER: f32 = -0.30;

/// The URL argument is constructed via f-string or concatenation with
/// user input (e.g. `f"http://{user_host}/api"`, `"http://" +
/// req.body.host`). Strictly stronger than just "user input flows in"
/// because the developer can't even rely on `urlparse` to normalize.
pub(super) const W_URL_FSTRING_CONCAT: f32 = -0.20;

/// The HTTP client call IS on `advocate.*` (i.e. the call site is
/// literally `advocate.Session().get(...)` or `s.get(...)` where `s`
/// is an Advocate session). Symmetric counterpart to 2e's
/// `W_API_DEFUSEDXML_CALL`: stronger than the file-scoped
/// `W_IMPORT_ADVOCATE` because it pins the actual call. This is the
/// safe-by-construction signal that D1's amendment is built around.
///
/// Honest review note (2026-05-11): mirrors the 2e amendment.
/// Without this signal, `s.get(req.body['url'])` in a handler scores
/// `+0.30 (import) - 0.50 (input) - 0.30 (handler) = -0.50` (RealBug),
/// defeating the point of using Advocate.
///
/// Treatment: rather than a +0.50 additive weight (which still loses
/// to handler + user-input combined at -0.80), we treat an Advocate
/// call as **collapsing to Benign**, same family as the `ssrf-safe`
/// annotation. Advocate IS the safe-by-construction answer; the URL
/// origin doesn't matter once Advocate is enforcing the gate at the
/// transport layer. The reason weight on the emitted PredictionReason
/// is +1.0 to mirror an annotation collapse.
pub(super) const W_API_ADVOCATE_CALL: f32 = 1.0;

// ───────────��─────────────────────────────────────────────────────────────────
// Lexicons used by source-classification helpers
// ─────────────────────────────────────────────────────────────────────────────

/// Substrings that identify user-input flow into URL construction.
/// Mirrors the existing `has_user_input` checks at old `ssrf.rs:163`
/// minus the regex-specific anchoring.
const USER_INPUT_SUBSTRINGS: &[&str] = &[
    "req.body",
    "req.query",
    "req.params",
    "req.form",
    "request.body",
    "request.query",
    "request.params",
    "request.json",
    "request.args",
    "request.form",
    "request.values",
    "ctx.params",
    "ctx.query",
    "ctx.request",
];

/// Substrings that identify test code. Mirrors 2d/2e.
const TEST_FUNCTION_SUBSTRINGS: &[&str] = &["test_", "_test", "fixture", "setup", "teardown"];

/// Substrings that identify a request-handler function. Mirrors 2d/2e
/// minus XXE-specific tokens. SSRF-relevant tokens added: `proxy`,
/// `fetch`, `download`, `webhook`, `callback`.
const HANDLER_FUNCTION_SUBSTRINGS: &[&str] = &[
    "handler",
    "route",
    "endpoint",
    "view",
    "controller",
    "middleware",
    "request",
    "response",
    "proxy",
    "fetch",
    "download",
    "webhook",
    "callback",
    "import",
];

/// Substrings that identify an allowlist / validator callable.
/// Conservative: must appear as a call (open paren after) within the
/// lookback window. The extractor enforces the open-paren check.
const ALLOWLIST_CALL_SUBSTRINGS: &[&str] = &[
    "is_safe_url",
    "validate_url",
    "check_url",
    "verify_url",
    "is_allowed_url",
    "ensure_safe_url",
    "validators.url",
    "url_allowed",
    "is_url_allowed",
];

/// Substrings that identify a scheme/hostname allowlist check.
const SCHEME_HOSTNAME_ALLOWLIST_SUBSTRINGS: &[&str] = &[
    ".scheme in",
    ".hostname in",
    ".host in",
    ".netloc in",
    "ALLOWED_HOSTS",
    "ALLOWED_URLS",
    "ALLOWLIST",
    "allowed_hosts",
    "allowed_urls",
    "allowlist",
];

/// Substrings that identify a private-IP guard via `ipaddress`.
const PRIVATE_IP_GUARD_SUBSTRINGS: &[&str] = &[
    "ip_address(",
    "ip_network(",
    "is_private",
    "is_loopback",
    "is_link_local",
    "is_reserved",
    "is_multicast",
    "is_unspecified",
];

// ─────────────────────────────────────────────────────────────────────────────
// Evidence
// ─────────────────────────────────────────────────────────────────────────────

/// Structured evidence extracted from an HTTP-client call site.
#[derive(Debug, Clone, Default, PartialEq)]
pub(super) struct Evidence {
    /// Which HTTP-client API the call site uses.
    pub api: Option<HttpApi>,

    /// Name of the enclosing function, if any.
    pub enclosing_function: Option<String>,

    /// Name of the enclosing class, if any (informational; no weight).
    pub enclosing_class: Option<String>,

    /// `advocate` imported anywhere in the file. File-scoped per the
    /// D5 #1 v0 limitation.
    pub import_advocate: bool,

    /// `defusedurl` / `safe_url_check` / similar safe-URL wrappers
    /// imported anywhere in the file.
    pub import_defusedurl: bool,

    /// `validators` library imported.
    pub import_validators: bool,

    /// An explicit allowlist/validator call (`is_safe_url`,
    /// `validate_url`, etc.) appears in the 10-line lookback window.
    pub has_allowlist_call: bool,

    /// A scheme/hostname allowlist check (`parsed.hostname in
    /// ALLOWED_HOSTS`, etc.) appears in the 10-line lookback window.
    pub has_scheme_hostname_allowlist: bool,

    /// A private-IP guard via `ipaddress` (`.is_private`,
    /// `.is_loopback`, etc.) appears in the 10-line lookback window.
    pub has_private_ip_guard: bool,

    /// User input (request body / query / params) flows into the URL
    /// argument within a 10-line lookback window.
    pub has_user_input_flow: bool,

    /// The URL argument is constructed via f-string or concatenation
    /// (e.g. `f"http://{x}/y"`, `"http://" + x`).
    pub url_fstring_or_concat: bool,

    /// `Some(reason)` if a `# repotoire: ssrf-safe[<reason>]`
    /// annotation appears on the call line. **Collapsing**.
    pub ssrf_safe_annotation: Option<String>,

    /// `Some(source)` if a `# repotoire: ssrf-vulnerable[<source>]`
    /// annotation appears on the call line. **Collapsing**.
    pub ssrf_vulnerable_annotation: Option<String>,
}

impl Evidence {
    #[cfg(test)]
    pub(super) fn empty() -> Self {
        Self::default()
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// Prediction
// ─────────────────────────────────────────────────────────────────────────────

#[derive(Debug, Clone)]
pub(super) struct Prediction {
    pub predicted: BranchLabel,
    pub alternative_branch: AlternativeBranch,
    pub predicted_severity: Severity,
    pub reasons: Vec<PredictionReason>,
    pub resolutions: Vec<ResolutionSignal>,
}

// ─────────────────────────────────────────────────────────────────────────────
// Scorer
// ─────────────────────────────────────────────────────────────────────────────

/// Build a [`Prediction`] from extracted [`Evidence`].
///
/// # Algorithm
///
/// 1. **Collapsing signals first.** If `ssrf_safe_annotation` or
///    `ssrf_vulnerable_annotation` is set, commit to the corresponding
///    branch with confidence 1.0 and skip weighted scoring.
/// 2. **Step 1.5 Advocate collapse.** If the API resolved to
///    `HttpApi::Advocate`, commit to Benign regardless of downstream
///    signals (D1 amendment).
/// 3. **Weighted scoring.** Sum weights for each present signal.
/// 4. **Tiebreak**: sum exactly 0.0 → predict RealBug. Conservative
///    default for security findings.
///
/// # Severity mapping
///
/// - Predicted RealBug → `Critical` if `has_user_input_flow`, else `High`.
/// - Predicted Benign → `Severity::Info`.
pub(super) fn predict(evidence: &Evidence) -> Prediction {
    let api = evidence.api.unwrap_or(HttpApi::Unknown);
    let api_label = api.callee_label();

    // ── Step 1: collapsing annotations. ──
    if let Some(reason) = &evidence.ssrf_safe_annotation {
        return collapse(
            BranchLabel::Benign,
            api,
            evidence.has_user_input_flow,
            ResolutionSignal {
                kind: ResolutionKind::SourceAnnotation {
                    syntax: format!("# repotoire: ssrf-safe[{reason}]"),
                },
                description: format!(
                    "`ssrf-safe[{reason}]` annotation declares this HTTP \
                     call as safe (caller-side allowlist, CDN-gateway \
                     validation, etc.); the finding collapses to Info."
                ),
                example: Some(format!(
                    "{api_label}(...)  # repotoire: ssrf-safe[{reason}]"
                )),
                collapses_to: BranchLabel::Benign,
            },
            PredictionReason {
                kind: PredictionReasonKind::Custom {
                    description: format!("ssrf-safe[{reason}] annotation"),
                },
                weight: 1.0,
                note: format!(
                    "Annotated as caller-validated ({reason}); not an \
                     SSRF risk."
                ),
            },
        );
    }
    if let Some(source) = &evidence.ssrf_vulnerable_annotation {
        return collapse(
            BranchLabel::RealBug,
            api,
            evidence.has_user_input_flow,
            ResolutionSignal {
                kind: ResolutionKind::SourceAnnotation {
                    syntax: format!("# repotoire: ssrf-vulnerable[{source}]"),
                },
                description: format!(
                    "`ssrf-vulnerable[{source}]` annotation declares this \
                     HTTP call as exposed to attacker-controlled URLs; \
                     the finding stays at the existing severity."
                ),
                example: Some(format!(
                    "{api_label}(...)  # repotoire: ssrf-vulnerable[{source}]"
                )),
                collapses_to: BranchLabel::RealBug,
            },
            PredictionReason {
                kind: PredictionReasonKind::Custom {
                    description: format!("ssrf-vulnerable[{source}] annotation"),
                },
                weight: -1.0,
                note: format!("Annotated as SSRF-exposed (source: {source})."),
            },
        );
    }

    // ── Step 1.5: collapsing on Advocate call. ──
    //
    // A call site on an `advocate.*` API is safe-by-construction:
    // Advocate enforces the SSRF gate at the transport layer, so the
    // URL the user supplies is irrelevant. We treat this as a
    // collapsing signal in the same family as the `ssrf-safe`
    // annotation (decisions D1 amendment, 2026-05-11) rather than an
    // additive weight, because handler + user-input combined (-0.80)
    // would otherwise dominate even a +0.50 additive signal.
    if matches!(api, HttpApi::Advocate) {
        return collapse(
            BranchLabel::Benign,
            api,
            evidence.has_user_input_flow,
            ResolutionSignal {
                kind: ResolutionKind::StructuralPattern {
                    description: "call on advocate API (safe-by-construction)".to_string(),
                },
                description: "`advocate.*` HTTP clients are safe-by-construction against SSRF: \
                     the Advocate wrapper enforces the IP allowlist / private-IP block \
                     at the request transport layer. The URL the user supplies is \
                     irrelevant once Advocate is in the path."
                    .to_string(),
                example: Some(format!("{api_label}(...)")),
                collapses_to: BranchLabel::Benign,
            },
            PredictionReason {
                kind: PredictionReasonKind::StructuralPattern {
                    description: "HTTP call uses advocate (safe-by-construction)".to_string(),
                },
                weight: W_API_ADVOCATE_CALL,
                note: "The call site is on an `advocate.*` API. Advocate \
                       blocks private IPs / metadata endpoints / loopback \
                       at the transport layer, so user-controlled URLs \
                       cannot mount an SSRF attack. Strongest Benign \
                       signal in the v0 model."
                    .to_string(),
            },
        );
    }

    // ── Step 2: weighted scoring. ──
    let mut sum: f32 = 0.0;
    let mut reasons: Vec<PredictionReason> = Vec::new();

    if evidence.import_advocate {
        sum += W_IMPORT_ADVOCATE;
        reasons.push(PredictionReason {
            kind: PredictionReasonKind::ImportPresence {
                module: "advocate".to_string(),
            },
            weight: W_IMPORT_ADVOCATE,
            note: "`advocate` is the de-facto SSRF-safe HTTP client for \
                   Python. The import is weak signal (file-scoped, not \
                   call-scoped per v0); a naked `requests.get` in the \
                   same file can still flip the verdict to RealBug."
                .to_string(),
        });
    }

    if evidence.import_defusedurl {
        sum += W_IMPORT_DEFUSEDURL;
        reasons.push(PredictionReason {
            kind: PredictionReasonKind::ImportPresence {
                module: "defusedurl".to_string(),
            },
            weight: W_IMPORT_DEFUSEDURL,
            note: "`defusedurl` / `safe_url_check` provides URL validation \
                   for SSRF mitigation. Weak signal on its own; pairs \
                   with the validator-call signals."
                .to_string(),
        });
    }

    if evidence.import_validators {
        sum += W_IMPORT_VALIDATORS;
        reasons.push(PredictionReason {
            kind: PredictionReasonKind::ImportPresence {
                module: "validators".to_string(),
            },
            weight: W_IMPORT_VALIDATORS,
            note: "`validators` exposes `validators.url(...)` — a syntactic \
                   URL validator. Weakest of the import signals; pairs \
                   with an actual call to `validators.url(...)` upstream."
                .to_string(),
        });
    }

    if evidence.has_allowlist_call {
        sum += W_ALLOWLIST_CALL;
        reasons.push(PredictionReason {
            kind: PredictionReasonKind::StructuralPattern {
                description: "URL passes through allowlist validator before request".to_string(),
            },
            weight: W_ALLOWLIST_CALL,
            note: "A developer-authored allowlist callable \
                   (`is_safe_url`, `validate_url`, `validators.url`, etc.) \
                   appears in the 10-line lookback window. v0 trusts \
                   *presence* not *correctness* — if the allowlist's \
                   body is misconfigured this signal still fires."
                .to_string(),
        });
    }

    if evidence.has_scheme_hostname_allowlist {
        sum += W_SCHEME_HOSTNAME_ALLOWLIST;
        reasons.push(PredictionReason {
            kind: PredictionReasonKind::StructuralPattern {
                description: "scheme/hostname allowlist check before request".to_string(),
            },
            weight: W_SCHEME_HOSTNAME_ALLOWLIST,
            note: "A scheme or hostname allowlist check \
                   (`parsed.scheme in {...}`, `parsed.hostname in \
                   ALLOWED_HOSTS`) appears in the 10-line lookback \
                   window. Same trust-presence-not-correctness caveat."
                .to_string(),
        });
    }

    if evidence.has_private_ip_guard {
        sum += W_PRIVATE_IP_GUARD;
        reasons.push(PredictionReason {
            kind: PredictionReasonKind::StructuralPattern {
                description: "private-IP guard via ipaddress before request".to_string(),
            },
            weight: W_PRIVATE_IP_GUARD,
            note: "An `ipaddress.ip_address(host).is_private` (or \
                   `.is_loopback`, `.is_link_local`, etc.) guard appears \
                   in the 10-line lookback window. Closes the metadata-\
                   endpoint / internal-host leg specifically."
                .to_string(),
        });
    }

    if evidence.has_user_input_flow {
        sum += W_USER_INPUT_FLOW;
        reasons.push(PredictionReason {
            kind: PredictionReasonKind::StructuralPattern {
                description: "user input flows to URL within 10 lines".to_string(),
            },
            weight: W_USER_INPUT_FLOW,
            note: "Request body / query / params / form data flows into \
                   the URL argument within a 10-line lookback window. \
                   Attacker controls the URL being requested."
                .to_string(),
        });
    }

    if evidence.url_fstring_or_concat {
        sum += W_URL_FSTRING_CONCAT;
        reasons.push(PredictionReason {
            kind: PredictionReasonKind::StructuralPattern {
                description: "URL constructed from f-string/concat with user input".to_string(),
            },
            weight: W_URL_FSTRING_CONCAT,
            note: "The URL argument is built via f-string or string \
                   concatenation. The developer cannot rely on \
                   `urlparse` to normalize the scheme/host because the \
                   URL is assembled raw."
                .to_string(),
        });
    }

    // Note: an `Advocate` call is handled by the Step 1.5 collapse
    // above and never reaches this point. Other APIs (Requests,
    // Urllib, Httpx, Aiohttp, Unknown) fall through to additive
    // scoring without contributing an API-specific weight.

    if let Some(fn_name) = &evidence.enclosing_function {
        if matches_test_function(fn_name) {
            sum += W_ENCLOSING_TEST_FUNCTION;
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::EnclosingScope {
                    scope_kind: "function".to_string(),
                    name: fn_name.clone(),
                },
                weight: W_ENCLOSING_TEST_FUNCTION,
                note: format!(
                    "Enclosing function `{fn_name}` looks like a \
                     test/fixture; test code rarely the actionable \
                     security target."
                ),
            });
        } else if matches_handler_function(fn_name) {
            sum += W_ENCLOSING_HANDLER;
            reasons.push(PredictionReason {
                kind: PredictionReasonKind::EnclosingScope {
                    scope_kind: "request_handler".to_string(),
                    name: fn_name.clone(),
                },
                weight: W_ENCLOSING_HANDLER,
                note: format!(
                    "Enclosing function `{fn_name}` looks like a request \
                     handler (`handler`/`route`/`endpoint`/`view`/\
                     `controller`/`proxy`/`fetch`/`webhook`/`callback`); \
                     higher prior on attacker-reachable code."
                ),
            });
        }
    }

    // ── Step 3: tiebreak + severity mapping. ──
    let predicted = if sum > 0.0 {
        BranchLabel::Benign
    } else {
        // Strict 0.0 tiebreak: lean RealBug. Conservative default
        // matching Phase 2a/2b/2c/2d/2e behavior for security findings.
        BranchLabel::RealBug
    };

    build_prediction(
        predicted,
        api,
        evidence.has_user_input_flow,
        reasons,
        Vec::new(),
    )
}

// ─────────────────────────────────────────────────────────────────────────────
// Helpers
// ─────────────────────────────────────────────────────────────────────────────

pub(super) fn matches_user_input(text: &str) -> bool {
    let lower = text.to_lowercase();
    USER_INPUT_SUBSTRINGS.iter().any(|sub| lower.contains(sub))
}

/// True iff `text` contains a recognizable allowlist/validator call.
/// Conservative: requires the substring to be followed by `(` (i.e.
/// the substring is a callable, not just a variable named like one).
pub(super) fn matches_allowlist_call(text: &str) -> bool {
    let lower = text.to_lowercase();
    ALLOWLIST_CALL_SUBSTRINGS.iter().any(|sub| {
        // Find ANY occurrence of `sub` followed by `(` (possibly with
        // whitespace). We can't bail on first occurrence — the same
        // identifier often appears in an `import` line first, and
        // again as a call below.
        let needle = sub.to_lowercase();
        let mut search_start = 0usize;
        while let Some(rel) = lower[search_start..].find(&needle) {
            let idx = search_start + rel;
            let after = &lower[idx + needle.len()..];
            if after.trim_start().starts_with('(') {
                return true;
            }
            // Advance past this occurrence and keep searching.
            search_start = idx + needle.len();
        }
        false
    })
}

pub(super) fn matches_scheme_hostname_allowlist(text: &str) -> bool {
    let lower = text.to_lowercase();
    SCHEME_HOSTNAME_ALLOWLIST_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(&sub.to_lowercase()))
}

pub(super) fn matches_private_ip_guard(text: &str) -> bool {
    let lower = text.to_lowercase();
    PRIVATE_IP_GUARD_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(sub))
}

fn matches_test_function(name: &str) -> bool {
    let lower = name.to_lowercase();
    TEST_FUNCTION_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(sub))
}

fn matches_handler_function(name: &str) -> bool {
    let lower = name.to_lowercase();
    HANDLER_FUNCTION_SUBSTRINGS
        .iter()
        .any(|sub| lower.contains(sub))
}

fn collapse(
    label: BranchLabel,
    api: HttpApi,
    has_user_input: bool,
    resolution: ResolutionSignal,
    reason: PredictionReason,
) -> Prediction {
    build_prediction(label, api, has_user_input, vec![reason], vec![resolution])
}

fn build_prediction(
    predicted: BranchLabel,
    api: HttpApi,
    has_user_input: bool,
    reasons: Vec<PredictionReason>,
    resolutions: Vec<ResolutionSignal>,
) -> Prediction {
    let api_label = api.callee_label();
    let predicted_severity = severity_for_branch(predicted, has_user_input);
    let alternative_label = predicted.opposite();
    let alternative_severity = severity_for_branch(alternative_label, has_user_input);

    let alternative_branch = AlternativeBranch {
        label: alternative_label,
        severity: alternative_severity,
        title: title_for_branch(alternative_label, api_label),
        description: description_for_branch(alternative_label, api_label),
        suggested_fix: suggested_fix_for_branch(alternative_label, api_label),
    };

    Prediction {
        predicted,
        alternative_branch,
        predicted_severity,
        reasons,
        resolutions,
    }
}

/// D3: RealBug severity is `Critical` if user input flows to the URL,
/// else `High` (mirrors old `ssrf.rs:178-205`); Benign → Info.
fn severity_for_branch(label: BranchLabel, has_user_input: bool) -> Severity {
    match label {
        BranchLabel::RealBug => {
            if has_user_input {
                Severity::Critical
            } else {
                Severity::High
            }
        }
        BranchLabel::Benign => Severity::Info,
    }
}

fn title_for_branch(label: BranchLabel, api_label: &str) -> String {
    match label {
        BranchLabel::RealBug => format!("Potential SSRF vulnerability in {api_label} call"),
        BranchLabel::Benign => {
            format!("HTTP call via {api_label} appears safely gated (informational)")
        }
    }
}

fn description_for_branch(label: BranchLabel, api_label: &str) -> String {
    match label {
        BranchLabel::RealBug => format!(
            "The `{api_label}` HTTP call appears to be operating without \
             a safe-URL gate. SSRF vulnerabilities allow attackers to \
             reach internal services (`http://internal-admin/`), exfil \
             cloud metadata endpoints (`http://169.254.169.254/`), \
             port-scan internal networks, and bypass IP-based access \
             controls."
        ),
        BranchLabel::Benign => format!(
            "The `{api_label}` HTTP call appears to be either safe-by-\
             construction (Advocate) or explicitly gated (allowlist \
             validator / private-IP guard / scheme allowlist). The \
             call site is carried as Info; the RealBug interpretation \
             is preserved in `alternative_branch` in case the predictor \
             is wrong."
        ),
    }
}

fn suggested_fix_for_branch(label: BranchLabel, _api_label: &str) -> Option<String> {
    match label {
        BranchLabel::RealBug => Some(
            "Use Advocate (the SSRF-safe HTTP client for Python) or add \
             an explicit allowlist check before the request. Example: \
             `from advocate import Session; Session().get(url)`. \
             Alternative: `parsed = urlparse(url); assert parsed.hostname \
             in ALLOWED_HOSTS; requests.get(url)`. Or: \
             `assert not ipaddress.ip_address(host).is_private; requests.get(url)`."
                .to_string(),
        ),
        BranchLabel::Benign => Some(
            "If this is intentional safe usage, annotate \
             `# repotoire: ssrf-safe[<reason>]` to collapse the finding \
             to Info definitively."
                .to_string(),
        ),
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// Annotation lookup helpers (called by evidence extraction)
// ─────────────────────────────────────────────────────────────────────────────

/// If `line` carries `# repotoire: ssrf-safe[<reason>]`, return the
/// reason. Defaults to `"unspecified"` if no arg supplied.
pub(super) fn extract_ssrf_safe_reason(line: &str) -> Option<String> {
    let ann = parse_python_comment(line)?;
    if ann.kind != "ssrf-safe" {
        return None;
    }
    if ann.args.is_empty() {
        Some("unspecified".to_string())
    } else {
        Some(ann.args[0].clone())
    }
}

/// If `line` carries `# repotoire: ssrf-vulnerable[<source>]`, return
/// the source. Defaults to `"unspecified"` if no arg supplied.
pub(super) fn extract_ssrf_vulnerable_source(line: &str) -> Option<String> {
    let ann = parse_python_comment(line)?;
    if ann.kind != "ssrf-vulnerable" {
        return None;
    }
    if ann.args.is_empty() {
        Some("unspecified".to_string())
    } else {
        Some(ann.args[0].clone())
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// Tests
// ─────────────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    // ─── Worked example 1 (decisions D1): canonical RealBug. ───
    #[test]
    fn requests_get_with_user_input_in_handler_predicts_realbug_critical() {
        let evidence = Evidence {
            api: Some(HttpApi::Requests),
            has_user_input_flow: true,
            enclosing_function: Some("proxy_handler".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Critical);
        assert_eq!(p.alternative_branch.label, BranchLabel::Benign);
        assert_eq!(p.alternative_branch.severity, Severity::Info);
        let total: f32 = p.reasons.iter().map(|r| r.weight).sum();
        assert!(
            (total - (W_USER_INPUT_FLOW + W_ENCLOSING_HANDLER)).abs() < 1e-6,
            "expected -0.80, got {total}"
        );
    }

    // ─── Worked example 2 (decisions D1 amendment): Advocate collapse. ───
    #[test]
    fn advocate_canonical_usage_predicts_benign() {
        // Advocate call → Step 1.5 collapse → Benign with the single
        // W_API_ADVOCATE_CALL-weighted reason. The `import_advocate`
        // signal is not added by the collapse path (collapsing returns
        // immediately) but that's fine: the call-site signal is
        // decisive.
        let evidence = Evidence {
            api: Some(HttpApi::Advocate),
            import_advocate: true,
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
        assert!(
            p.reasons.iter().any(|r| r.weight == W_API_ADVOCATE_CALL),
            "must emit the advocate-call reason"
        );
    }

    // ─── Worked example 2b: Advocate with user input stays Benign. ───
    #[test]
    fn advocate_call_with_user_input_stays_benign() {
        // Without the call-site collapse: +0.30 (import) - 0.50 (input)
        // - 0.30 (handler) = -0.50 → RealBug. WRONG — Advocate is
        // safe-by-construction regardless of input. With the collapse:
        // Step 1.5 returns Benign immediately.
        let evidence = Evidence {
            api: Some(HttpApi::Advocate),
            import_advocate: true,
            has_user_input_flow: true,
            enclosing_function: Some("proxy_handler".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(
            p.predicted,
            BranchLabel::Benign,
            "advocate call stays Benign even with user input + handler"
        );
        assert_eq!(p.predicted_severity, Severity::Info);
    }

    // ─── Worked example 3 (decisions D1): allowlist additive but
    // still RealBug if input + handler present. Documented v0
    // conservative default. ───
    #[test]
    fn allowlist_call_with_user_input_in_handler_stays_realbug() {
        // is_safe_url is called upstream but user input + handler
        // dominate: +0.40 - 0.50 - 0.30 = -0.40 → RealBug.
        let evidence = Evidence {
            api: Some(HttpApi::Requests),
            has_allowlist_call: true,
            has_user_input_flow: true,
            enclosing_function: Some("proxy_handler".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        let total: f32 = p.reasons.iter().map(|r| r.weight).sum();
        assert!(
            (total - (W_ALLOWLIST_CALL + W_USER_INPUT_FLOW + W_ENCLOSING_HANDLER)).abs() < 1e-6,
            "expected -0.40, got {total}"
        );
    }

    // ─── Worked example 4: private-IP guard tips the balance Benign
    // when combined with allowlist (no handler/input). ───
    #[test]
    fn allowlist_plus_private_ip_guard_predicts_benign() {
        let evidence = Evidence {
            api: Some(HttpApi::Requests),
            has_allowlist_call: true,
            has_private_ip_guard: true,
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
        let total: f32 = p.reasons.iter().map(|r| r.weight).sum();
        assert!(
            (total - (W_ALLOWLIST_CALL + W_PRIVATE_IP_GUARD)).abs() < 1e-6,
            "expected +0.70, got {total}"
        );
    }

    // ─── Worked example 5 (decisions D5 #1): unused advocate import. ───
    //
    // `import advocate` + naked requests.get with no other signals.
    // +0.30 = Benign tiebreak-close. Documented v0 false-positive.
    #[test]
    fn unused_advocate_import_with_naked_requests_predicts_benign() {
        let evidence = Evidence {
            api: Some(HttpApi::Requests),
            import_advocate: true,
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
    }

    // ─── Worked example 6 (decisions D1 honest-review #1): the
    // unused-advocate case STILL flips RealBug when any negative
    // signal fires. Calibration justification for the +0.30 weight. ───
    #[test]
    fn unused_advocate_in_handler_with_user_input_correctly_flips_realbug() {
        let evidence = Evidence {
            api: Some(HttpApi::Requests),
            import_advocate: true,
            has_user_input_flow: true,
            enclosing_function: Some("proxy_handler".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        // +0.30 + -0.50 + -0.30 = -0.50
        let total: f32 = p.reasons.iter().map(|r| r.weight).sum();
        assert!((total - -0.50).abs() < 1e-6, "expected -0.50, got {total}");
    }

    // ─── F-string concat with user input ───
    #[test]
    fn fstring_with_user_input_predicts_realbug_critical() {
        let evidence = Evidence {
            api: Some(HttpApi::Requests),
            has_user_input_flow: true,
            url_fstring_or_concat: true,
            enclosing_function: Some("proxy_handler".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        assert_eq!(p.predicted_severity, Severity::Critical);
        let total: f32 = p.reasons.iter().map(|r| r.weight).sum();
        // -0.50 + -0.30 + -0.20 = -1.00
        assert!((total - -1.00).abs() < 1e-6, "expected -1.00, got {total}");
    }

    // ─── Collapsing annotations ───
    #[test]
    fn ssrf_safe_annotation_collapses_to_benign() {
        let evidence = Evidence {
            // Other signals say RealBug, but the annotation overrides.
            api: Some(HttpApi::Requests),
            has_user_input_flow: true,
            enclosing_function: Some("proxy_handler".to_string()),
            ssrf_safe_annotation: Some("validated-by-cdn".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::Benign);
        assert_eq!(p.predicted_severity, Severity::Info);
        assert_eq!(p.resolutions.len(), 1);
        assert!(matches!(
            p.resolutions[0].kind,
            ResolutionKind::SourceAnnotation { .. }
        ));
    }

    #[test]
    fn ssrf_vulnerable_annotation_collapses_to_realbug() {
        let evidence = Evidence {
            // Other signals say Benign, but the annotation overrides.
            api: Some(HttpApi::Advocate),
            import_advocate: true,
            ssrf_vulnerable_annotation: Some("audited-untrusted".to_string()),
            ..Default::default()
        };
        let p = predict(&evidence);
        assert_eq!(p.predicted, BranchLabel::RealBug);
        // No user input → severity High.
        assert_eq!(p.predicted_severity, Severity::High);
    }

    // ─── Tiebreak ───
    #[test]
    fn empty_evidence_tiebreaks_realbug() {
        let p = predict(&Evidence::empty());
        assert_eq!(p.predicted, BranchLabel::RealBug);
        // No user input → severity High.
        assert_eq!(p.predicted_severity, Severity::High);
    }

    // ─── Sign convention ───
    #[test]
    #[allow(clippy::assertions_on_constants)]
    fn realbug_signal_weights_are_negative() {
        assert!(W_USER_INPUT_FLOW < 0.0);
        assert!(W_ENCLOSING_HANDLER < 0.0);
        assert!(W_URL_FSTRING_CONCAT < 0.0);
    }

    #[test]
    #[allow(clippy::assertions_on_constants)]
    fn benign_signal_weights_are_positive() {
        assert!(W_IMPORT_ADVOCATE > 0.0);
        assert!(W_IMPORT_DEFUSEDURL > 0.0);
        assert!(W_IMPORT_VALIDATORS > 0.0);
        assert!(W_ALLOWLIST_CALL > 0.0);
        assert!(W_SCHEME_HOSTNAME_ALLOWLIST > 0.0);
        assert!(W_PRIVATE_IP_GUARD > 0.0);
        assert!(W_ENCLOSING_TEST_FUNCTION > 0.0);
        assert!(W_API_ADVOCATE_CALL > 0.0);
    }

    // ─── Honest review note pin: Advocate weight calibration. ───
    #[test]
    #[allow(clippy::assertions_on_constants)]
    fn advocate_weight_is_overcome_by_user_input_and_handler() {
        // The unused-import baseline (+0.30) must be flipped by
        // user_input + handler (-0.80).
        let unused_import_baseline = W_IMPORT_ADVOCATE;
        let max_negatives = W_USER_INPUT_FLOW + W_ENCLOSING_HANDLER;
        assert!(unused_import_baseline > 0.0);
        assert!(unused_import_baseline + max_negatives < 0.0);
    }

    // ─── Lexicon checks ───
    #[test]
    fn user_input_lexicon() {
        assert!(matches_user_input("url = req.body.url"));
        assert!(matches_user_input("u = request.json['url']"));
        assert!(matches_user_input("target = request.args.get('u')"));
        assert!(matches_user_input("p = ctx.params['target']"));
        assert!(!matches_user_input("u = os.environ['API_URL']"));
    }

    #[test]
    fn allowlist_call_lexicon() {
        // Substring + open paren = call. Plain substring = not a call.
        assert!(matches_allowlist_call("if is_safe_url(url):"));
        assert!(matches_allowlist_call("validators.url(url, public=False)"));
        assert!(matches_allowlist_call("validate_url(url)"));
        // Just a comment / docstring mention — not a call.
        assert!(!matches_allowlist_call(
            "# call is_safe_url before requesting"
        ));
        // Variable name only:
        assert!(!matches_allowlist_call("is_safe_url_flag = True"));
    }

    #[test]
    fn scheme_hostname_allowlist_lexicon() {
        assert!(matches_scheme_hostname_allowlist(
            "if parsed.hostname in ALLOWED_HOSTS:"
        ));
        assert!(matches_scheme_hostname_allowlist(
            "if parsed.scheme in {'http', 'https'}:"
        ));
        assert!(matches_scheme_hostname_allowlist(
            "allowed_hosts = ['x.com']"
        ));
        assert!(!matches_scheme_hostname_allowlist(
            "hostname = parsed.hostname"
        ));
    }

    #[test]
    fn private_ip_guard_lexicon() {
        assert!(matches_private_ip_guard(
            "if ipaddress.ip_address(host).is_private:"
        ));
        assert!(matches_private_ip_guard(
            "if ipaddress.ip_address(h).is_loopback:"
        ));
        assert!(matches_private_ip_guard("ip = ip_address(host)"));
        assert!(!matches_private_ip_guard("host = 'example.com'"));
    }

    #[test]
    fn handler_lexicon() {
        assert!(matches_handler_function("proxy_handler"));
        assert!(matches_handler_function("fetch_route"));
        assert!(matches_handler_function("import_data"));
        assert!(matches_handler_function("download_endpoint"));
        assert!(matches_handler_function("webhook_callback"));
        assert!(!matches_handler_function("calculate_total"));
    }

    #[test]
    fn test_function_lexicon() {
        assert!(matches_test_function("test_ssrf_handler"));
        assert!(matches_test_function("ssrf_test"));
        assert!(matches_test_function("setup_fixture"));
        assert!(!matches_test_function("fetch_data"));
    }

    // ─── Extract helpers ───
    #[test]
    fn extract_ssrf_safe_with_reason() {
        assert_eq!(
            extract_ssrf_safe_reason("requests.get(url)  # repotoire: ssrf-safe[validated-by-cdn]"),
            Some("validated-by-cdn".to_string())
        );
    }

    #[test]
    fn extract_ssrf_safe_without_reason() {
        assert_eq!(
            extract_ssrf_safe_reason("requests.get(url)  # repotoire: ssrf-safe"),
            Some("unspecified".to_string())
        );
    }

    #[test]
    fn extract_ssrf_vulnerable_with_source() {
        assert_eq!(
            extract_ssrf_vulnerable_source(
                "advocate_session.get(url)  # repotoire: ssrf-vulnerable[audited]"
            ),
            Some("audited".to_string())
        );
    }

    #[test]
    fn extract_ssrf_safe_ignores_other_kinds() {
        assert_eq!(
            extract_ssrf_safe_reason("subprocess.run(...)  # repotoire: command-static[ok]"),
            None
        );
        assert_eq!(
            extract_ssrf_safe_reason("ET.parse(blob)  # repotoire: xxe-safe[ok]"),
            None
        );
    }

    #[test]
    fn extract_ssrf_vulnerable_ignores_other_kinds() {
        assert_eq!(
            extract_ssrf_vulnerable_source(
                "subprocess.run(...)  # repotoire: command-user-controlled[GET]"
            ),
            None
        );
        assert_eq!(
            extract_ssrf_vulnerable_source("ET.parse(blob)  # repotoire: xxe-vulnerable[audited]"),
            None
        );
    }

    // ─── HttpApi helpers ───
    #[test]
    fn http_api_is_python_includes_recognized_libs() {
        assert!(HttpApi::Advocate.is_python());
        assert!(HttpApi::Requests.is_python());
        assert!(HttpApi::Urllib.is_python());
        assert!(HttpApi::Httpx.is_python());
        assert!(HttpApi::Aiohttp.is_python());
        assert!(!HttpApi::Unknown.is_python());
    }

    #[test]
    fn http_api_callee_label_is_stable() {
        assert_eq!(HttpApi::Advocate.callee_label(), "advocate");
        assert_eq!(HttpApi::Requests.callee_label(), "requests");
        assert_eq!(HttpApi::Urllib.callee_label(), "urllib");
        assert_eq!(HttpApi::Httpx.callee_label(), "httpx");
        assert_eq!(HttpApi::Aiohttp.callee_label(), "aiohttp");
        assert_eq!(HttpApi::Unknown.callee_label(), "HTTP client");
    }
}