trusty-review 0.4.1

//! Unit tests for `pipeline::verify` (Phase 2, #583, #726).
//!
//! Why: split from `verify.rs` to keep that file under the 500-line cap.
//! What: covers candidate selection, outcome application, verdict re-derivation
//! (paths a/b/c), end-to-end rounds, and truncation regression (#726).
//! Test: this is the test module; each function is a self-contained unit test.

use std::sync::Arc;

use async_trait::async_trait;

use super::*;
use crate::{
    config::constants::VERIFY_REFUTED_CONFIDENCE,
    llm::{LlmError, LlmProvider, LlmRequest, LlmResponse},
    models::{Effort, Finding, Verdict, VerifyOutcome},
};

// ── Deterministic fake verifier providers ─────────────────────────────────────

/// A verifier that always returns the same fixed judgment text.
struct FixedVerifier {
    text: String,
}

impl FixedVerifier {
    fn confirmed() -> Self {
        Self {
            text: r#"{"judgment":"CONFIRMED","reason":"present in diff"}"#.to_string(),
        }
    }
    fn refuted() -> Self {
        Self {
            text: r#"{"judgment":"REFUTED","reason":"not in diff"}"#.to_string(),
        }
    }
}

#[async_trait]
impl LlmProvider for FixedVerifier {
    fn name(&self) -> &str {
        "fixed-verifier"
    }
    async fn complete(&self, req: LlmRequest) -> Result<LlmResponse, LlmError> {
        Ok(LlmResponse {
            text: self.text.clone(),
            model: req.model.clone(),
            input_tokens: 10,
            output_tokens: 5,
            latency_ms: 1,
            cost_usd: 0.0,
            finish_reason: None,
        })
    }
}

/// A verifier that always returns the same fixed judgment text.
struct TruncatedVerifier;

#[async_trait]
impl LlmProvider for TruncatedVerifier {
    fn name(&self) -> &str {
        "truncated-verifier"
    }
    async fn complete(&self, req: LlmRequest) -> Result<LlmResponse, LlmError> {
        // Simulate a response truncated mid-JSON (as seen with max_tokens=16).
        Ok(LlmResponse {
            text: r#"{"judg"#.to_string(),
            model: req.model.clone(),
            input_tokens: 10,
            output_tokens: 3,
            latency_ms: 1,
            cost_usd: 0.0,
            finish_reason: None,
        })
    }
}

/// A verifier that always fails with a configurable `LlmError`.
struct FailingVerifier {
    make_err: fn() -> LlmError,
}

#[async_trait]
impl LlmProvider for FailingVerifier {
    fn name(&self) -> &str {
        "failing-verifier"
    }
    async fn complete(&self, _req: LlmRequest) -> Result<LlmResponse, LlmError> {
        Err((self.make_err)())
    }
}

fn finding(effort: Effort, confidence: f32) -> Finding {
    let mut f = Finding::new("src/a.rs", "logic", "a bug", "fix it", confidence, effort);
    f.line = Some(10);
    f
}

fn confirmed_provider() -> Arc<dyn LlmProvider> {
    Arc::new(FixedVerifier::confirmed())
}
fn refuted_provider() -> Arc<dyn LlmProvider> {
    Arc::new(FixedVerifier::refuted())
}
fn truncated_provider() -> Arc<dyn LlmProvider> {
    Arc::new(TruncatedVerifier)
}

// ── Candidate selection ───────────────────────────────────────────────────────

#[test]
fn select_candidates_block_uses_wide_net() {
    // On a BLOCK verdict every finding ≥ 0.50 is a candidate.
    let findings = vec![
        finding(Effort::High, 0.95),   // candidate
        finding(Effort::Medium, 0.55), // candidate (>= 0.50)
        finding(Effort::Low, 0.30),    // NOT a candidate (< 0.50)
    ];
    let idxs = select_candidates(Verdict::Block, &findings);
    assert_eq!(
        idxs,
        vec![0, 1],
        "block verdict casts a wide net down to 0.50"
    );
}

#[test]
fn select_candidates_request_changes_uses_wide_net() {
    let findings = vec![finding(Effort::Medium, 0.50), finding(Effort::Low, 0.49)];
    let idxs = select_candidates(Verdict::RequestChanges, &findings);
    assert_eq!(idxs, vec![0], "0.50 is included; 0.49 is excluded");
}

#[test]
fn select_candidates_approve_uses_block_tier_only() {
    // On an APPROVE* verdict only blocking-tier (>= 0.90) findings are verified.
    let findings = vec![
        finding(Effort::High, 0.92),   // candidate (>= 0.90)
        finding(Effort::Medium, 0.80), // NOT a candidate
        finding(Effort::Medium, 0.55), // NOT a candidate
    ];
    let idxs = select_candidates(Verdict::ApproveWithReservations, &findings);
    assert_eq!(
        idxs,
        vec![0],
        "approve verdict only verifies block-tier findings"
    );

    let idxs_plain = select_candidates(Verdict::Approve, &findings);
    assert_eq!(idxs_plain, vec![0], "plain APPROVE behaves the same");
}

#[test]
fn select_candidates_unknown_is_empty() {
    let findings = vec![finding(Effort::High, 0.99)];
    assert!(select_candidates(Verdict::Unknown, &findings).is_empty());
}

// ── Outcome application ───────────────────────────────────────────────────────

#[test]
fn apply_outcome_confirmed_keeps_confidence() {
    let mut f = finding(Effort::High, 0.95);
    apply_outcome(&mut f, VerifyOutcome::Confirmed);
    assert!(
        (f.confidence - 0.95).abs() < f32::EPSILON,
        "CONFIRMED keeps confidence"
    );
    assert!(matches!(f.verified, Some(VerifyOutcome::Confirmed)));
}

#[test]
fn apply_outcome_refuted_demotes_below_advisory() {
    let mut f = finding(Effort::High, 0.95);
    apply_outcome(&mut f, VerifyOutcome::Refuted);
    assert!(
        (f.confidence - VERIFY_REFUTED_CONFIDENCE).abs() < f32::EPSILON,
        "REFUTED demotes confidence below the advisory tier"
    );
    assert!(matches!(f.verified, Some(VerifyOutcome::Refuted)));
}

#[test]
fn apply_outcome_error_refuted_also_demotes() {
    let mut f = finding(Effort::High, 0.95);
    apply_outcome(
        &mut f,
        VerifyOutcome::ErrorRefuted {
            error_class: "ModelNotFound".to_string(),
        },
    );
    assert!((f.confidence - VERIFY_REFUTED_CONFIDENCE).abs() < f32::EPSILON);
    assert!(matches!(
        f.verified,
        Some(VerifyOutcome::ErrorRefuted { .. })
    ));
}

// ── Verdict re-derivation (refuted exclusion) ─────────────────────────────────

#[test]
fn rederive_excludes_refuted_relaxes() {
    // Path (b): one High finding, clean REFUTED, nothing confirmed → excluded +
    // neutral baseline → APPROVE.
    let mut f = finding(Effort::High, 0.95);
    apply_outcome(&mut f, VerifyOutcome::Refuted);
    // any_clean_refuted=true triggers path (b): drop to APPROVE baseline.
    let verdict = rederive_verdict(Verdict::Block, false, true, &[f]);
    assert_eq!(
        verdict,
        Verdict::Approve,
        "a cleanly-refuted candidate set must relax BLOCK to APPROVE (path b)"
    );
}

#[test]
fn rederive_keeps_confirmed_block() {
    // Path (a): one High finding, confirmed → survives → BLOCK floor.
    let mut f = finding(Effort::High, 0.95);
    apply_outcome(&mut f, VerifyOutcome::Confirmed);
    let verdict = rederive_verdict(Verdict::Block, true, false, &[f]);
    assert_eq!(
        verdict,
        Verdict::Block,
        "a confirmed High finding must keep the BLOCK floor (path a)"
    );
}

#[test]
fn rederive_confirmed_medium_caps_at_approve_star() {
    // Path (a2) — #1015: confirmed Medium-only caps baseline at APPROVE*; does not
    // anchor REQUEST_CHANGES from a floor-driven escalation.
    let mut med = finding(Effort::Medium, 0.85);
    apply_outcome(&mut med, VerifyOutcome::Confirmed);
    let verdict = rederive_verdict(Verdict::RequestChanges, true, false, &[med]);
    assert_eq!(
        verdict,
        Verdict::ApproveWithReservations,
        "confirmed Medium caps at APPROVE* (path a2 — #1015)"
    );
}

#[test]
fn rederive_confirmed_praise_keeps_clean_approve() {
    // Path (a2) — #1343 runtime residual: the model itself emitted a clean APPROVE
    // (grade A-).  The verifier CONFIRMS a confidence=1.0 low-effort `praise`
    // finding.  Confirming a non-High finding must NOT raise the baseline to
    // APPROVE* — the source-of-truth APPROVE review_body wins.  Before the fix,
    // path (a2) hard-coded the baseline to APPROVE*, yielding APPROVE* and
    // clamping the A- grade down to C+.  After the fix, severity-min(APPROVE,
    // APPROVE*) = APPROVE, so the verdict stays APPROVE and the grade stays A-.
    let mut praise = finding(Effort::Low, 1.0);
    apply_outcome(&mut praise, VerifyOutcome::Confirmed);
    let verdict = rederive_verdict(Verdict::Approve, true, false, &[praise]);
    assert_eq!(
        verdict,
        Verdict::Approve,
        "a confirmed low-effort praise finding must NOT harden a clean APPROVE to \
         APPROVE* (path a2 — #1343 runtime residual)"
    );

    // And the grade clamp must keep A- (not downgrade to C+): clamp_grade_to_verdict
    // of A- against APPROVE is a no-op, whereas against APPROVE* it would drop to C+.
    use crate::pipeline::letter_grade::{Grade, clamp_grade_to_verdict};
    let clamped = clamp_grade_to_verdict(Grade::AMinus, &verdict);
    assert_eq!(
        clamped,
        Grade::AMinus,
        "grade must stay A- when verdict stays APPROVE (#1343 runtime residual)"
    );
}

#[test]
fn rederive_confirmed_high_effort_still_escalates_from_approve() {
    // Guard: the #1343 fix must NOT defang the safety net.  A CONFIRMED High-effort
    // (critical) finding still escalates even when the model said APPROVE — path (a)
    // keeps primary_verdict, and derive_verdict's BLOCK floor then escalates.
    let mut high = finding(Effort::High, 0.95);
    apply_outcome(&mut high, VerifyOutcome::Confirmed);
    let verdict = rederive_verdict(Verdict::Approve, true, false, &[high]);
    assert_eq!(
        verdict,
        Verdict::Block,
        "a confirmed High-effort finding still escalates APPROVE to BLOCK (path a)"
    );
}

#[test]
fn rederive_mixed_keeps_only_surviving_floor() {
    // Path (a2): High refuted + confirmed Medium@0.85, model said APPROVE*.
    let mut high = finding(Effort::High, 0.95);
    apply_outcome(&mut high, VerifyOutcome::Refuted);
    let mut med = finding(Effort::Medium, 0.85);
    apply_outcome(&mut med, VerifyOutcome::Confirmed);
    let verdict = rederive_verdict(Verdict::ApproveWithReservations, true, true, &[high, med]);
    assert_eq!(
        verdict,
        Verdict::ApproveWithReservations,
        "surviving single Medium floors to APPROVE*; refuted High is excluded (path a)"
    );
}

#[test]
fn rederive_error_refuted_preserves_primary_verdict() {
    // Path (c): all demotions are ErrorRefuted (infra fail) → preserve primary.
    let mut f = finding(Effort::High, 0.95);
    apply_outcome(
        &mut f,
        VerifyOutcome::ErrorRefuted {
            error_class: "ModelNotFound".to_string(),
        },
    );
    let verdict = rederive_verdict(Verdict::Block, false, false, &[f]);
    assert_eq!(
        verdict,
        Verdict::Block,
        "all-ErrorRefuted must preserve primary_verdict (path c)"
    );
}

#[test]
fn rederive_truncation_refuted_preserves_primary_verdict() {
    // Path (c): all demotions are TruncationRefuted → preserve primary (#726).
    let mut f = finding(Effort::High, 0.85);
    apply_outcome(&mut f, VerifyOutcome::TruncationRefuted);
    let verdict = rederive_verdict(Verdict::Block, false, false, &[f]);
    assert_eq!(
        verdict,
        Verdict::Block,
        "all-TruncationRefuted must preserve primary_verdict (path c)"
    );
}

// ── End-to-end verification round ─────────────────────────────────────────────

#[tokio::test]
async fn verify_confirmed_keeps_and_block_holds() {
    // A single High-effort, high-confidence finding that the verifier CONFIRMS:
    // confidence is kept and the BLOCK verdict holds.
    let verifier = confirmed_provider();
    let mut findings = vec![finding(Effort::High, 0.95)];
    let verdict = run_verification_round(
        &verifier,
        "us.anthropic.claude-haiku-4-5",
        "+ some diff",
        Verdict::Block,
        &mut findings,
        None,
        None,
    )
    .await;
    assert_eq!(
        verdict,
        Verdict::Block,
        "confirmed High finding must hold BLOCK"
    );
    assert!(matches!(
        findings[0].verified,
        Some(VerifyOutcome::Confirmed)
    ));
    assert!((findings[0].confidence - 0.95).abs() < f32::EPSILON);
}

#[tokio::test]
async fn verify_refuted_demotes_and_block_relaxes() {
    // The ONLY blocking finding is REFUTED → demoted → derive_verdict relaxes
    // from BLOCK down to APPROVE (no substantive findings remain).
    let verifier = refuted_provider();
    let mut findings = vec![finding(Effort::High, 0.95)];
    let verdict = run_verification_round(
        &verifier,
        "us.anthropic.claude-haiku-4-5",
        "+ some diff",
        Verdict::Block,
        &mut findings,
        None,
        None,
    )
    .await;
    assert_eq!(
        verdict,
        Verdict::Approve,
        "refuting the only blocking finding must relax BLOCK to APPROVE"
    );
    assert!(matches!(findings[0].verified, Some(VerifyOutcome::Refuted)));
    assert!(
        (findings[0].confidence - VERIFY_REFUTED_CONFIDENCE).abs() < f32::EPSILON,
        "refuted finding is demoted, not dropped"
    );
}

#[tokio::test]
async fn verify_no_candidates_is_noop() {
    // APPROVE verdict with only sub-block-tier findings → no candidates → the
    // findings are untouched and the verdict re-derives unchanged.
    let verifier = refuted_provider(); // would refute, but is never called
    let mut findings = vec![finding(Effort::Low, 0.40)];
    let verdict = run_verification_round(
        &verifier,
        "m",
        "diff",
        Verdict::Approve,
        &mut findings,
        None,
        None,
    )
    .await;
    assert_eq!(verdict, Verdict::Approve);
    assert!(
        findings[0].verified.is_none(),
        "no candidate must stay unverified"
    );
    assert!((findings[0].confidence - 0.40).abs() < f32::EPSILON);
}

#[tokio::test]
async fn verify_unknown_is_passthrough() {
    let verifier = refuted_provider();
    let mut findings = vec![finding(Effort::High, 0.95)];
    let verdict = run_verification_round(
        &verifier,
        "m",
        "diff",
        Verdict::Unknown,
        &mut findings,
        None,
        None,
    )
    .await;
    assert_eq!(
        verdict,
        Verdict::Unknown,
        "UNKNOWN passes through untouched"
    );
    assert!(findings[0].verified.is_none(), "UNKNOWN must not verify");
}

#[tokio::test]
async fn verify_model_unavailable_marks_error_refuted_and_preserves_verdict() {
    // ModelNotFound → ErrorRefuted (path c) → primary_verdict preserved (#726).
    let verifier: Arc<dyn LlmProvider> = Arc::new(FailingVerifier {
        make_err: || LlmError::ModelNotFound("stale-verifier".to_string()),
    });
    let mut findings = vec![finding(Effort::High, 0.95)];
    let verdict = run_verification_round(
        &verifier,
        "stale-verifier",
        "+ diff",
        Verdict::Block,
        &mut findings,
        None,
        None,
    )
    .await;
    assert!(matches!(
        findings[0].verified,
        Some(VerifyOutcome::ErrorRefuted { .. })
    ));
    assert_eq!(
        verdict,
        Verdict::Block,
        "ErrorRefuted-only round must preserve primary verdict"
    );
}

// ── Truncation path (#726 regression) ─────────────────────────────────────────

#[tokio::test]
async fn verify_truncated_response_is_truncation_refuted() {
    // Unparseable/truncated verifier output → TruncationRefuted, confidence demoted.
    let mut findings = vec![finding(Effort::High, 0.95)];
    run_verification_round(
        &truncated_provider(),
        "m",
        "+ diff",
        Verdict::Block,
        &mut findings,
        None,
        None,
    )
    .await;
    assert!(matches!(
        findings[0].verified,
        Some(VerifyOutcome::TruncationRefuted)
    ));
    assert!((findings[0].confidence - VERIFY_REFUTED_CONFIDENCE).abs() < f32::EPSILON);
}

#[tokio::test]
async fn verify_truncation_preserves_primary_verdict() {
    // All-TruncationRefuted (path c) → primary verdict preserved (#726 root cause).
    let mut findings = vec![finding(Effort::High, 0.95)];
    let verdict = run_verification_round(
        &truncated_provider(),
        "m",
        "+ diff",
        Verdict::Block,
        &mut findings,
        None,
        None,
    )
    .await;
    assert_eq!(
        verdict,
        Verdict::Block,
        "truncation-only round must preserve primary verdict (path c)"
    );
}

/// Regression for the dropped-JoinHandle true-positive (PR #720, #726 incident).
/// Why: (a) CONFIRMED Medium → APPROVE* (path a2, #1015 — pre-#1015 was REQUEST_CHANGES);
/// (b) TruncationRefuted must NOT collapse to APPROVE (path c, #726).
/// Test: this test itself.
#[tokio::test]
async fn verify_join_handle_regression_pr720() {
    let mut f = Finding::new(
        "crates/trusty-search/src/startup.rs",
        "resource-leak",
        "JoinHandle dropped; spawned task detached, risking pool exhaustion",
        "Store the JoinHandle and await it in graceful shutdown",
        0.85,
        Effort::Medium,
    );
    f.line = Some(47);
    let diff = "+pub fn spawn_warm_boot_task() {\n\
                +    tokio::spawn(async move { warm_boot().await });\n\
                +}\n";

    // Sub-test (a): CONFIRMED Medium → path (a2): baseline=APPROVE*, stays APPROVE*.
    let mut findings_1 = vec![f.clone()];
    let v1 = run_verification_round(
        &confirmed_provider(),
        "us.anthropic.claude-sonnet-4-6",
        diff,
        Verdict::RequestChanges,
        &mut findings_1,
        None,
        None,
    )
    .await;
    assert!(matches!(
        findings_1[0].verified,
        Some(VerifyOutcome::Confirmed)
    ));
    // After #1015: a confirmed Medium caps at APPROVE* (path a2), not REQUEST_CHANGES.
    assert_eq!(
        v1,
        Verdict::ApproveWithReservations,
        "CONFIRMED Medium → APPROVE* (path a2 — #1015)"
    );

    // Sub-test (b): TruncationRefuted → verdict preserved (path c — #726).
    let mut findings_2 = vec![f];
    let v2 = run_verification_round(
        &truncated_provider(),
        "us.anthropic.claude-sonnet-4-6",
        diff,
        Verdict::RequestChanges,
        &mut findings_2,
        None,
        None,
    )
    .await;
    assert!(matches!(
        findings_2[0].verified,
        Some(VerifyOutcome::TruncationRefuted)
    ));
    assert_eq!(
        v2,
        Verdict::RequestChanges,
        "truncation must NOT collapse to APPROVE (path c — #726)"
    );
}

// ── #1015 regression ──────────────────────────────────────────────────────────

/// Regression: APPROVE + two Medium@0.70 must stay APPROVE (#1015).
/// Advisory Mediums (≤ 0.80) excluded from floor count; APPROVE stays APPROVE.
#[tokio::test]
async fn verify_approve_two_advisory_medium_stays_approve() {
    let verifier = confirmed_provider();
    let mut findings = vec![finding(Effort::Medium, 0.70), finding(Effort::Medium, 0.70)];
    let verdict = run_verification_round(
        &verifier,
        "m",
        "+ advisory diff",
        Verdict::Approve,
        &mut findings,
        None,
        None,
    )
    .await;
    assert_eq!(
        verdict,
        Verdict::Approve,
        "advisory Medium@0.70 must not escalate APPROVE to REQUEST_CHANGES (#1015)"
    );
}

// ── Verify-path schema deserialization (#1235 strict-mode regression guard) ────
//
// Symmetric to the review path's `parse_direct_json_strict_full_shape`
// (`parser_tests.rs`). The #1235 strict-mode fix makes `reason` a REQUIRED
// property on the OpenAI verify schema; `#[serde(default)]` on
// `VerifyJudgment::reason` is what keeps lenient providers (Bedrock / Anthropic /
// Gemini) that OMIT `reason` deserializing instead of silently failing the
// verify path. These tests pin that invariant so a future edit that drops the
// `#[serde(default)]` fails loudly.

/// Full-shape verify response (`judgment` + `reason`) deserializes and is parsed.
///
/// Why: proves the happy path for strict providers that emit every required
/// field round-trips into `VerifyJudgment` and maps to the right decision.
/// What: deserializes `{"judgment":"CONFIRMED","reason":...}` and confirms both
/// the typed struct fields and `parse_judgment` agree it is CONFIRMED.
/// Test: this is the test.
#[test]
fn verify_judgment_full_shape_deserializes() {
    let body = serde_json::json!({
        "judgment": "CONFIRMED",
        "reason": "the finding is present in the diff at the cited line",
    })
    .to_string();

    let parsed: VerifyJudgment =
        serde_json::from_str(&body).expect("full-shape verify response must deserialize");
    assert_eq!(parsed.judgment, "CONFIRMED");
    assert_eq!(
        parsed.reason,
        "the finding is present in the diff at the cited line"
    );

    // End-to-end through the public parser entry point.
    assert_eq!(parse_judgment(&body), Some(true));
}

/// Verify response that OMITS `reason` still deserializes (proves `#[serde(default)]`).
///
/// Why: lenient providers (Bedrock / Anthropic / Gemini) ignore the strict
/// schema and may omit `reason`. Without `#[serde(default)]` on
/// `VerifyJudgment::reason` this would fail to deserialize and silently break
/// the verify path — the #1235 regression this PR guards against.
/// What: deserializes `{"judgment":"REFUTED"}` (no `reason`), asserts `reason`
/// defaults to the empty string, and that `parse_judgment` still maps REFUTED.
/// Test: this is the test.
#[test]
fn verify_judgment_omits_reason_still_deserializes() {
    let body = serde_json::json!({ "judgment": "REFUTED" }).to_string();

    let parsed: VerifyJudgment = serde_json::from_str(&body)
        .expect("verify response omitting `reason` must still deserialize (#[serde(default)])");
    assert_eq!(parsed.judgment, "REFUTED");
    assert_eq!(
        parsed.reason, "",
        "omitted `reason` must default to empty string"
    );

    // End-to-end: a reason-less judgment still yields a clean decision.
    assert_eq!(parse_judgment(&body), Some(false));
}

// Liveness gate decision logic is tested in `verify_liveness.rs::tests`
// (`liveness_alive_allows_start`, `liveness_model_unavailable_refuses`, etc.)
// to keep this file under the 500-line cap and respect module ownership.