use crate::error::MultiError;
use crate::mailbox::Mailbox;
use crate::runner::AgentRunner;
use crate::shared::SharedInfra;
use crate::types::{AgentOutput, AgentSpec};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tracing::instrument;
pub const SEVERITY_INCONCLUSIVE: &str = "inconclusive";
const INCONCLUSIVE_EVIDENCE_CAP: usize = 1024;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReviewFinding {
pub criterion: String,
pub passed: bool,
pub evidence: String,
pub severity: String, }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AdversarialReviewResult {
pub spec: String,
pub passed: bool,
pub findings: Vec<ReviewFinding>,
pub reviewer_output: AgentOutput,
pub blocker_count: usize,
pub inconclusive: bool,
}
pub struct AdversarialReview {
pub reviewer: AgentSpec,
pub criteria: Vec<String>,
pub fail_on_blockers: bool,
}
impl AdversarialReview {
pub fn new(reviewer: AgentSpec, criteria: Vec<String>) -> Self {
Self {
reviewer,
criteria,
fail_on_blockers: true,
}
}
#[instrument(name = "multi.adversarial_review", skip_all)]
pub async fn run(
&self,
work_output: &str,
runner: &Arc<dyn AgentRunner>,
infra: &SharedInfra,
) -> Result<AdversarialReviewResult, MultiError> {
let criteria_text = self
.criteria
.iter()
.enumerate()
.map(|(i, c)| format!("{}. {}", i + 1, c))
.collect::<Vec<_>>()
.join("\n");
let review_task = format!(
r#"You are an adversarial reviewer. Your job is to find problems.
## Acceptance Criteria
{criteria}
## Work Output to Review
{work}
## Instructions
Evaluate the work output against EACH acceptance criterion above.
For each criterion, determine if it PASSES or FAILS. Provide specific evidence
(file paths, line numbers, code snippets, or direct quotes from the output).
Be strict. If a criterion is ambiguous, assume it should be fully met.
Flag anything suspicious as a "blocker" or "major" finding.
Respond with a JSON object:
```json
{{
"passed": true/false,
"findings": [
{{
"criterion": "criterion text",
"passed": true/false,
"evidence": "specific evidence with file:line references",
"severity": "blocker|major|minor|info"
}}
]
}}
```"#,
criteria = criteria_text,
work = work_output,
);
infra
.begin_agent()
.map_err(|e| MultiError::BudgetExhausted(e.to_string()))?;
let mailbox = Mailbox::default();
let rt = infra.make_runtime();
let output = runner
.run(&self.reviewer, &review_task, &rt, &mailbox)
.await
.map_err(|e| {
MultiError::AgentFailed(
self.reviewer.name.clone(),
format!("adversarial review failed: {}", e),
)
})?;
infra.record_output(&output);
let parsed = Self::parse_findings(&output.answer);
let findings = parsed.findings;
let blocker_count = findings.iter().filter(|f| f.severity == "blocker").count();
let passed = if parsed.inconclusive {
false
} else if self.fail_on_blockers {
blocker_count == 0 && findings.iter().all(|f| f.passed || f.severity != "major")
} else {
findings.iter().filter(|f| f.passed).count() > findings.len() / 2
};
Ok(AdversarialReviewResult {
spec: criteria_text,
passed,
findings,
reviewer_output: output,
blocker_count,
inconclusive: parsed.inconclusive,
})
}
fn parse_findings(response: &str) -> ParsedReview {
if let Some(json_str) = car_ir::json_extract::extract_json_object(response) {
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&json_str) {
let findings: Vec<ReviewFinding> = parsed
.get("findings")
.and_then(|f| f.as_array())
.map(|arr| {
arr.iter()
.filter_map(|f| {
Some(ReviewFinding {
criterion: f.get("criterion")?.as_str()?.to_string(),
passed: f.get("passed")?.as_bool()?,
evidence: f.get("evidence")?.as_str()?.to_string(),
severity: f
.get("severity")
.and_then(|s| s.as_str())
.unwrap_or("major")
.to_string(),
})
})
.collect()
})
.unwrap_or_default();
if !findings.is_empty() {
return ParsedReview {
findings,
inconclusive: false,
};
}
if let Some(passed) = parsed.get("passed").and_then(|p| p.as_bool()) {
return ParsedReview {
findings: vec![ReviewFinding {
criterion: "overall".to_string(),
passed,
evidence: if passed {
"reviewer reported a clean pass with no per-criterion findings"
} else {
"reviewer reported failure with no per-criterion findings"
}
.to_string(),
severity: if passed { "info" } else { "major" }.to_string(),
}],
inconclusive: false,
};
}
}
}
let mut evidence = String::from(
"reviewer response could not be parsed into a verdict; treat as not verified. raw: ",
);
let raw = response.trim();
if raw.len() > INCONCLUSIVE_EVIDENCE_CAP {
let mut end = INCONCLUSIVE_EVIDENCE_CAP;
while end > 0 && !raw.is_char_boundary(end) {
end -= 1;
}
evidence.push_str(&raw[..end]);
evidence.push_str("… [truncated]");
} else {
evidence.push_str(raw);
}
ParsedReview {
findings: vec![ReviewFinding {
criterion: "overall".to_string(),
passed: false,
evidence,
severity: SEVERITY_INCONCLUSIVE.to_string(),
}],
inconclusive: true,
}
}
}
struct ParsedReview {
findings: Vec<ReviewFinding>,
inconclusive: bool,
}
#[cfg(test)]
mod tests {
use super::*;
use car_engine::Runtime;
#[test]
fn parse_findings_from_json() {
let response = r#"```json
{
"passed": false,
"findings": [
{"criterion": "all tests pass", "passed": true, "evidence": "cargo test: 50 passed", "severity": "info"},
{"criterion": "no hardcoded secrets", "passed": false, "evidence": "src/config.rs:42 contains API key", "severity": "blocker"}
]
}
```"#;
let parsed = AdversarialReview::parse_findings(response);
assert!(!parsed.inconclusive);
assert_eq!(parsed.findings.len(), 2);
assert!(parsed.findings[0].passed);
assert!(!parsed.findings[1].passed);
assert_eq!(parsed.findings[1].severity, "blocker");
}
#[test]
fn unparseable_response_is_inconclusive_not_a_pass() {
let response = "Sure! This work clearly passes criterion 1 and looks great.";
let parsed = AdversarialReview::parse_findings(response);
assert!(parsed.inconclusive, "no JSON verdict => inconclusive");
assert_eq!(parsed.findings.len(), 1);
assert!(!parsed.findings[0].passed, "must fail closed, not open");
assert_eq!(parsed.findings[0].severity, SEVERITY_INCONCLUSIVE);
}
#[test]
fn inconclusive_evidence_is_bounded() {
let response = "x".repeat(INCONCLUSIVE_EVIDENCE_CAP * 4);
let parsed = AdversarialReview::parse_findings(&response);
assert!(parsed.inconclusive);
assert!(parsed.findings[0].evidence.len() < INCONCLUSIVE_EVIDENCE_CAP + 128);
assert!(parsed.findings[0].evidence.ends_with("… [truncated]"));
}
#[test]
fn inconclusive_evidence_truncation_is_utf8_safe() {
let response = "中".repeat(INCONCLUSIVE_EVIDENCE_CAP);
let parsed = AdversarialReview::parse_findings(&response);
assert!(parsed.inconclusive);
assert!(parsed.findings[0].evidence.len() < INCONCLUSIVE_EVIDENCE_CAP + 128);
assert!(parsed.findings[0].evidence.ends_with("… [truncated]"));
}
#[test]
fn empty_findings_with_explicit_pass_is_a_conclusive_pass() {
let response = r#"{"passed": true, "findings": []}"#;
let parsed = AdversarialReview::parse_findings(response);
assert!(!parsed.inconclusive, "explicit top-level verdict is conclusive");
assert_eq!(parsed.findings.len(), 1);
assert!(parsed.findings[0].passed);
assert_eq!(parsed.findings[0].severity, "info");
}
#[test]
fn empty_findings_with_explicit_fail_is_a_conclusive_fail() {
let response = r#"{"passed": false, "findings": []}"#;
let parsed = AdversarialReview::parse_findings(response);
assert!(!parsed.inconclusive);
assert!(!parsed.findings[0].passed);
}
#[test]
fn empty_findings_without_verdict_is_inconclusive() {
let response = r#"{"summary": "looks fine", "findings": []}"#;
let parsed = AdversarialReview::parse_findings(response);
assert!(parsed.inconclusive);
assert_eq!(parsed.findings[0].severity, SEVERITY_INCONCLUSIVE);
}
#[test]
fn no_findings_key_and_no_verdict_is_inconclusive() {
let response = r#"{"note": "I reviewed it"}"#;
let parsed = AdversarialReview::parse_findings(response);
assert!(parsed.inconclusive);
}
struct FixedReviewer(&'static str);
#[async_trait::async_trait]
impl AgentRunner for FixedReviewer {
async fn run(
&self,
spec: &AgentSpec,
_task: &str,
_runtime: &Runtime,
_mailbox: &Mailbox,
) -> Result<AgentOutput, MultiError> {
Ok(AgentOutput {
name: spec.name.clone(),
answer: self.0.to_string(),
turns: 1,
tool_calls: 0,
duration_ms: 1.0,
error: None,
outcome: None,
tokens: None,
tools_used: Vec::new(),
})
}
}
#[tokio::test]
async fn run_forces_fail_closed_on_inconclusive() {
let runner: Arc<dyn AgentRunner> =
Arc::new(FixedReviewer("Looks great, this passes everything!"));
let infra = SharedInfra::new();
let r = AdversarialReview::new(
AgentSpec::new("reviewer", "review it"),
vec!["criterion one".to_string()],
)
.run("some work output", &runner, &infra)
.await
.unwrap();
assert!(r.inconclusive, "unparseable review must be inconclusive");
assert!(!r.passed, "inconclusive must fail closed at the gate");
}
}