use crate::error::MultiError;
use crate::mailbox::Mailbox;
use crate::runner::AgentRunner;
use crate::shared::SharedInfra;
use crate::types::{AgentOutput, AgentSpec};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tracing::instrument;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReviewFinding {
pub criterion: String,
pub passed: bool,
pub evidence: String,
pub severity: String, }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AdversarialReviewResult {
pub spec: String,
pub passed: bool,
pub findings: Vec<ReviewFinding>,
pub reviewer_output: AgentOutput,
pub blocker_count: usize,
}
pub struct AdversarialReview {
pub reviewer: AgentSpec,
pub criteria: Vec<String>,
pub fail_on_blockers: bool,
}
impl AdversarialReview {
pub fn new(reviewer: AgentSpec, criteria: Vec<String>) -> Self {
Self {
reviewer,
criteria,
fail_on_blockers: true,
}
}
#[instrument(name = "multi.adversarial_review", skip_all)]
pub async fn run(
&self,
work_output: &str,
runner: &Arc<dyn AgentRunner>,
infra: &SharedInfra,
) -> Result<AdversarialReviewResult, MultiError> {
let criteria_text = self.criteria.iter()
.enumerate()
.map(|(i, c)| format!("{}. {}", i + 1, c))
.collect::<Vec<_>>()
.join("\n");
let review_task = format!(
r#"You are an adversarial reviewer. Your job is to find problems.
## Acceptance Criteria
{criteria}
## Work Output to Review
{work}
## Instructions
Evaluate the work output against EACH acceptance criterion above.
For each criterion, determine if it PASSES or FAILS. Provide specific evidence
(file paths, line numbers, code snippets, or direct quotes from the output).
Be strict. If a criterion is ambiguous, assume it should be fully met.
Flag anything suspicious as a "blocker" or "major" finding.
Respond with a JSON object:
```json
{{
"passed": true/false,
"findings": [
{{
"criterion": "criterion text",
"passed": true/false,
"evidence": "specific evidence with file:line references",
"severity": "blocker|major|minor|info"
}}
]
}}
```"#,
criteria = criteria_text,
work = work_output,
);
let mailbox = Mailbox::default();
let rt = infra.make_runtime();
let output = runner
.run(&self.reviewer, &review_task, &rt, &mailbox)
.await
.map_err(|e| MultiError::AgentFailed(
self.reviewer.name.clone(),
format!("adversarial review failed: {}", e),
))?;
let findings = Self::parse_findings(&output.answer);
let blocker_count = findings.iter().filter(|f| f.severity == "blocker").count();
let passed = if self.fail_on_blockers {
blocker_count == 0 && findings.iter().all(|f| f.passed || f.severity != "major")
} else {
findings.iter().filter(|f| f.passed).count() > findings.len() / 2
};
Ok(AdversarialReviewResult {
spec: criteria_text,
passed,
findings,
reviewer_output: output,
blocker_count,
})
}
fn parse_findings(response: &str) -> Vec<ReviewFinding> {
if let Some(json_str) = car_ir::json_extract::extract_json_object(response) {
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&json_str) {
if let Some(findings) = parsed.get("findings").and_then(|f| f.as_array()) {
return findings.iter().filter_map(|f| {
Some(ReviewFinding {
criterion: f.get("criterion")?.as_str()?.to_string(),
passed: f.get("passed")?.as_bool()?,
evidence: f.get("evidence")?.as_str()?.to_string(),
severity: f.get("severity").and_then(|s| s.as_str())
.unwrap_or("major").to_string(),
})
}).collect();
}
}
}
vec![ReviewFinding {
criterion: "overall".to_string(),
passed: response.to_lowercase().contains("pass"),
evidence: response.to_string(),
severity: "major".to_string(),
}]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_findings_from_json() {
let response = r#"```json
{
"passed": false,
"findings": [
{"criterion": "all tests pass", "passed": true, "evidence": "cargo test: 50 passed", "severity": "info"},
{"criterion": "no hardcoded secrets", "passed": false, "evidence": "src/config.rs:42 contains API key", "severity": "blocker"}
]
}
```"#;
let findings = AdversarialReview::parse_findings(response);
assert_eq!(findings.len(), 2);
assert!(findings[0].passed);
assert!(!findings[1].passed);
assert_eq!(findings[1].severity, "blocker");
}
#[test]
fn parse_findings_fallback() {
let response = "This looks good overall. PASS.";
let findings = AdversarialReview::parse_findings(response);
assert_eq!(findings.len(), 1);
assert!(findings[0].passed);
}
}