car-multi 0.27.0

//! AdversarialReview — fresh agent reviews work against a spec.
//!
//! Key property: the reviewer gets NO prior context from the author.
//! It receives only the work output and the acceptance criteria, then
//! evaluates pass/fail with evidence (file:line references).
//!
//! Inspired by metaswarm's 4-phase execution loop where adversarial
//! reviewers are ALWAYS fresh Task() instances — never teammates,
//! never resumed, never given prior context.

use crate::error::MultiError;
use crate::mailbox::Mailbox;
use crate::runner::AgentRunner;
use crate::shared::SharedInfra;
use crate::types::{AgentOutput, AgentSpec};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tracing::instrument;

/// Severity used for the synthetic finding emitted when the reviewer's
/// response can't be parsed into a verdict. Distinct from a real
/// pass/fail so consumers can treat it as "not adversarially verified"
/// rather than approve or reject. (car#359)
pub const SEVERITY_INCONCLUSIVE: &str = "inconclusive";

/// Cap on how much of an unparseable reviewer response is carried as
/// evidence. The raw response was previously dumped verbatim, producing
/// multi-KB findings (car#359).
const INCONCLUSIVE_EVIDENCE_CAP: usize = 1024;

/// A single review criterion with pass/fail and evidence.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReviewFinding {
    pub criterion: String,
    pub passed: bool,
    pub evidence: String,
    pub severity: String, // "blocker", "major", "minor", "info", "inconclusive"
}

/// Result of an adversarial review.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AdversarialReviewResult {
    /// The task/spec being reviewed against.
    pub spec: String,
    /// Overall pass/fail.
    pub passed: bool,
    /// Per-criterion findings.
    pub findings: Vec<ReviewFinding>,
    /// The reviewer's raw output.
    pub reviewer_output: AgentOutput,
    /// Number of blockers found.
    pub blocker_count: usize,
    /// True when the reviewer's response could not be parsed into a
    /// verdict. In this state `passed` is forced to `false` (fail-closed)
    /// and `findings` holds a single [`SEVERITY_INCONCLUSIVE`] finding.
    /// Consumers should treat an inconclusive result as "not
    /// adversarially verified" — surface it, don't silently approve or
    /// reject on it. (car#359)
    pub inconclusive: bool,
}

/// Configuration for adversarial review.
pub struct AdversarialReview {
    /// The reviewer agent spec. Must be a different agent from the author.
    pub reviewer: AgentSpec,
    /// The acceptance criteria / spec to review against.
    pub criteria: Vec<String>,
    /// Whether blockers auto-fail the review.
    pub fail_on_blockers: bool,
}

impl AdversarialReview {
    pub fn new(reviewer: AgentSpec, criteria: Vec<String>) -> Self {
        Self {
            reviewer,
            criteria,
            fail_on_blockers: true,
        }
    }

    /// Run an adversarial review of the given work output.
    ///
    /// The reviewer is always a fresh agent with NO context from the author.
    /// It only sees: the work output and the acceptance criteria.
    #[instrument(name = "multi.adversarial_review", skip_all)]
    pub async fn run(
        &self,
        work_output: &str,
        runner: &Arc<dyn AgentRunner>,
        infra: &SharedInfra,
    ) -> Result<AdversarialReviewResult, MultiError> {
        let criteria_text = self
            .criteria
            .iter()
            .enumerate()
            .map(|(i, c)| format!("{}. {}", i + 1, c))
            .collect::<Vec<_>>()
            .join("\n");

        let review_task = format!(
            r#"You are an adversarial reviewer. Your job is to find problems.

## Acceptance Criteria
{criteria}

## Work Output to Review
{work}

## Instructions
Evaluate the work output against EACH acceptance criterion above.
For each criterion, determine if it PASSES or FAILS. Provide specific evidence
(file paths, line numbers, code snippets, or direct quotes from the output).

Be strict. If a criterion is ambiguous, assume it should be fully met.
Flag anything suspicious as a "blocker" or "major" finding.

Respond with a JSON object:
```json
{{
  "passed": true/false,
  "findings": [
    {{
      "criterion": "criterion text",
      "passed": true/false,
      "evidence": "specific evidence with file:line references",
      "severity": "blocker|major|minor|info"
    }}
  ]
}}
```"#,
            criteria = criteria_text,
            work = work_output,
        );

        // Budget gate the reviewer. The review is a single agent and its whole
        // purpose; if the budget can't afford it, surface that rather than
        // returning a misleading empty pass/fail.
        infra
            .begin_agent()
            .map_err(|e| MultiError::BudgetExhausted(e.to_string()))?;

        let mailbox = Mailbox::default();
        let rt = infra.make_runtime();
        let output = runner
            .run(&self.reviewer, &review_task, &rt, &mailbox)
            .await
            .map_err(|e| {
                MultiError::AgentFailed(
                    self.reviewer.name.clone(),
                    format!("adversarial review failed: {}", e),
                )
            })?;
        infra.record_output(&output);

        // Parse the review response
        let parsed = Self::parse_findings(&output.answer);
        let findings = parsed.findings;
        let blocker_count = findings.iter().filter(|f| f.severity == "blocker").count();
        let passed = if parsed.inconclusive {
            // The review couldn't be parsed into a verdict. Don't guess —
            // an unverified review is not a pass. (car#359)
            false
        } else if self.fail_on_blockers {
            blocker_count == 0 && findings.iter().all(|f| f.passed || f.severity != "major")
        } else {
            findings.iter().filter(|f| f.passed).count() > findings.len() / 2
        };

        Ok(AdversarialReviewResult {
            spec: criteria_text,
            passed,
            findings,
            reviewer_output: output,
            blocker_count,
            inconclusive: parsed.inconclusive,
        })
    }

    /// Parse a reviewer response into findings.
    ///
    /// Returns `inconclusive: true` when no structured verdict could be
    /// extracted — the previous behavior of guessing pass/fail from the
    /// substring `"pass"` fails open (review prompts and model chatter
    /// routinely contain "passes"), so an unparseable review is reported
    /// as inconclusive instead, for the caller to treat as unverified.
    /// (car#359)
    fn parse_findings(response: &str) -> ParsedReview {
        // Try to extract JSON
        if let Some(json_str) = car_ir::json_extract::extract_json_object(response) {
            if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&json_str) {
                let findings: Vec<ReviewFinding> = parsed
                    .get("findings")
                    .and_then(|f| f.as_array())
                    .map(|arr| {
                        arr.iter()
                            .filter_map(|f| {
                                Some(ReviewFinding {
                                    criterion: f.get("criterion")?.as_str()?.to_string(),
                                    passed: f.get("passed")?.as_bool()?,
                                    evidence: f.get("evidence")?.as_str()?.to_string(),
                                    severity: f
                                        .get("severity")
                                        .and_then(|s| s.as_str())
                                        .unwrap_or("major")
                                        .to_string(),
                                })
                            })
                            .collect()
                    })
                    .unwrap_or_default();
                if !findings.is_empty() {
                    return ParsedReview {
                        findings,
                        inconclusive: false,
                    };
                }
                // No usable per-criterion findings, but an explicit
                // top-level `passed` boolean is still a conclusive verdict —
                // honor it (a reviewer that finds nothing wrong legitimately
                // returns `{"passed": true, "findings": []}`) rather than
                // discarding the one piece of structure the prompt asked for.
                if let Some(passed) = parsed.get("passed").and_then(|p| p.as_bool()) {
                    return ParsedReview {
                        findings: vec![ReviewFinding {
                            criterion: "overall".to_string(),
                            passed,
                            evidence: if passed {
                                "reviewer reported a clean pass with no per-criterion findings"
                            } else {
                                "reviewer reported failure with no per-criterion findings"
                            }
                            .to_string(),
                            severity: if passed { "info" } else { "major" }.to_string(),
                        }],
                        inconclusive: false,
                    };
                }
                // Parseable JSON but neither well-formed findings nor a
                // top-level verdict — as unverified as no JSON at all.
            }
        }
        // Fallback: the response carries no extractable verdict. Emit a
        // single inconclusive finding (fail-closed) with a bounded slice
        // of the raw response for diagnosis, never the whole thing.
        let mut evidence = String::from(
            "reviewer response could not be parsed into a verdict; treat as not verified. raw: ",
        );
        let raw = response.trim();
        if raw.len() > INCONCLUSIVE_EVIDENCE_CAP {
            // Slice on a char boundary to avoid panicking mid-codepoint.
            let mut end = INCONCLUSIVE_EVIDENCE_CAP;
            while end > 0 && !raw.is_char_boundary(end) {
                end -= 1;
            }
            evidence.push_str(&raw[..end]);
            evidence.push_str("… [truncated]");
        } else {
            evidence.push_str(raw);
        }
        ParsedReview {
            findings: vec![ReviewFinding {
                criterion: "overall".to_string(),
                passed: false,
                evidence,
                severity: SEVERITY_INCONCLUSIVE.to_string(),
            }],
            inconclusive: true,
        }
    }
}

/// Outcome of [`AdversarialReview::parse_findings`]: the extracted
/// findings plus whether the response was unparseable (car#359).
struct ParsedReview {
    findings: Vec<ReviewFinding>,
    inconclusive: bool,
}

#[cfg(test)]
mod tests {
    use super::*;
    use car_engine::Runtime;

    #[test]
    fn parse_findings_from_json() {
        let response = r#"```json
{
  "passed": false,
  "findings": [
    {"criterion": "all tests pass", "passed": true, "evidence": "cargo test: 50 passed", "severity": "info"},
    {"criterion": "no hardcoded secrets", "passed": false, "evidence": "src/config.rs:42 contains API key", "severity": "blocker"}
  ]
}
```"#;
        let parsed = AdversarialReview::parse_findings(response);
        assert!(!parsed.inconclusive);
        assert_eq!(parsed.findings.len(), 2);
        assert!(parsed.findings[0].passed);
        assert!(!parsed.findings[1].passed);
        assert_eq!(parsed.findings[1].severity, "blocker");
    }

    #[test]
    fn unparseable_response_is_inconclusive_not_a_pass() {
        // The old behavior guessed pass from the substring "pass"; a
        // garbage review echoing "passes criterion" must NOT read as a pass.
        let response = "Sure! This work clearly passes criterion 1 and looks great.";
        let parsed = AdversarialReview::parse_findings(response);
        assert!(parsed.inconclusive, "no JSON verdict => inconclusive");
        assert_eq!(parsed.findings.len(), 1);
        assert!(!parsed.findings[0].passed, "must fail closed, not open");
        assert_eq!(parsed.findings[0].severity, SEVERITY_INCONCLUSIVE);
    }

    #[test]
    fn inconclusive_evidence_is_bounded() {
        let response = "x".repeat(INCONCLUSIVE_EVIDENCE_CAP * 4);
        let parsed = AdversarialReview::parse_findings(&response);
        assert!(parsed.inconclusive);
        // Prefix + capped slice + suffix — never the full multi-KB dump.
        assert!(parsed.findings[0].evidence.len() < INCONCLUSIVE_EVIDENCE_CAP + 128);
        assert!(parsed.findings[0].evidence.ends_with("… [truncated]"));
    }

    #[test]
    fn inconclusive_evidence_truncation_is_utf8_safe() {
        // 3-byte chars: the cap (1024) is not a multiple of 3, so byte 1024
        // lands mid-codepoint — exercises the char-boundary walk-back, which
        // must not panic (slicing mid-codepoint would).
        let response = "中".repeat(INCONCLUSIVE_EVIDENCE_CAP);
        let parsed = AdversarialReview::parse_findings(&response);
        assert!(parsed.inconclusive);
        // Bounded (prefix + ≤cap slice + suffix) and truncated; valid UTF-8
        // by construction since `evidence` is a `String`.
        assert!(parsed.findings[0].evidence.len() < INCONCLUSIVE_EVIDENCE_CAP + 128);
        assert!(parsed.findings[0].evidence.ends_with("… [truncated]"));
    }

    #[test]
    fn empty_findings_with_explicit_pass_is_a_conclusive_pass() {
        // A reviewer that finds nothing wrong legitimately returns this —
        // it must NOT be marked inconclusive (car#359 review follow-up).
        let response = r#"{"passed": true, "findings": []}"#;
        let parsed = AdversarialReview::parse_findings(response);
        assert!(!parsed.inconclusive, "explicit top-level verdict is conclusive");
        assert_eq!(parsed.findings.len(), 1);
        assert!(parsed.findings[0].passed);
        assert_eq!(parsed.findings[0].severity, "info");
    }

    #[test]
    fn empty_findings_with_explicit_fail_is_a_conclusive_fail() {
        let response = r#"{"passed": false, "findings": []}"#;
        let parsed = AdversarialReview::parse_findings(response);
        assert!(!parsed.inconclusive);
        assert!(!parsed.findings[0].passed);
    }

    #[test]
    fn empty_findings_without_verdict_is_inconclusive() {
        // No usable findings AND no top-level boolean — genuinely unverified.
        let response = r#"{"summary": "looks fine", "findings": []}"#;
        let parsed = AdversarialReview::parse_findings(response);
        assert!(parsed.inconclusive);
        assert_eq!(parsed.findings[0].severity, SEVERITY_INCONCLUSIVE);
    }

    #[test]
    fn no_findings_key_and_no_verdict_is_inconclusive() {
        let response = r#"{"note": "I reviewed it"}"#;
        let parsed = AdversarialReview::parse_findings(response);
        assert!(parsed.inconclusive);
    }

    /// Reviewer that always returns the same fixed answer, to drive
    /// `run()` end-to-end and pin the gate-level fail-closed behavior.
    struct FixedReviewer(&'static str);

    #[async_trait::async_trait]
    impl AgentRunner for FixedReviewer {
        async fn run(
            &self,
            spec: &AgentSpec,
            _task: &str,
            _runtime: &Runtime,
            _mailbox: &Mailbox,
        ) -> Result<AgentOutput, MultiError> {
            Ok(AgentOutput {
                name: spec.name.clone(),
                answer: self.0.to_string(),
                turns: 1,
                tool_calls: 0,
                duration_ms: 1.0,
                error: None,
                outcome: None,
                tokens: None,
                tools_used: Vec::new(),
            })
        }
    }

    #[tokio::test]
    async fn run_forces_fail_closed_on_inconclusive() {
        // Garbage that echoes "passes" — the old substring heuristic would
        // have flipped this to passed=true.
        let runner: Arc<dyn AgentRunner> =
            Arc::new(FixedReviewer("Looks great, this passes everything!"));
        let infra = SharedInfra::new();
        let r = AdversarialReview::new(
            AgentSpec::new("reviewer", "review it"),
            vec!["criterion one".to_string()],
        )
        .run("some work output", &runner, &infra)
        .await
        .unwrap();
        assert!(r.inconclusive, "unparseable review must be inconclusive");
        assert!(!r.passed, "inconclusive must fail closed at the gate");
    }
}