car_multi/patterns/
adversarial_review.rs1use crate::error::MultiError;
12use crate::mailbox::Mailbox;
13use crate::runner::AgentRunner;
14use crate::shared::SharedInfra;
15use crate::types::{AgentOutput, AgentSpec};
16use serde::{Deserialize, Serialize};
17use std::sync::Arc;
18use tracing::instrument;
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct ReviewFinding {
23 pub criterion: String,
24 pub passed: bool,
25 pub evidence: String,
26 pub severity: String, }
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct AdversarialReviewResult {
32 pub spec: String,
34 pub passed: bool,
36 pub findings: Vec<ReviewFinding>,
38 pub reviewer_output: AgentOutput,
40 pub blocker_count: usize,
42}
43
44pub struct AdversarialReview {
46 pub reviewer: AgentSpec,
48 pub criteria: Vec<String>,
50 pub fail_on_blockers: bool,
52}
53
54impl AdversarialReview {
55 pub fn new(reviewer: AgentSpec, criteria: Vec<String>) -> Self {
56 Self {
57 reviewer,
58 criteria,
59 fail_on_blockers: true,
60 }
61 }
62
63 #[instrument(name = "multi.adversarial_review", skip_all)]
68 pub async fn run(
69 &self,
70 work_output: &str,
71 runner: &Arc<dyn AgentRunner>,
72 infra: &SharedInfra,
73 ) -> Result<AdversarialReviewResult, MultiError> {
74 let criteria_text = self.criteria.iter()
75 .enumerate()
76 .map(|(i, c)| format!("{}. {}", i + 1, c))
77 .collect::<Vec<_>>()
78 .join("\n");
79
80 let review_task = format!(
81 r#"You are an adversarial reviewer. Your job is to find problems.
82
83## Acceptance Criteria
84{criteria}
85
86## Work Output to Review
87{work}
88
89## Instructions
90Evaluate the work output against EACH acceptance criterion above.
91For each criterion, determine if it PASSES or FAILS. Provide specific evidence
92(file paths, line numbers, code snippets, or direct quotes from the output).
93
94Be strict. If a criterion is ambiguous, assume it should be fully met.
95Flag anything suspicious as a "blocker" or "major" finding.
96
97Respond with a JSON object:
98```json
99{{
100 "passed": true/false,
101 "findings": [
102 {{
103 "criterion": "criterion text",
104 "passed": true/false,
105 "evidence": "specific evidence with file:line references",
106 "severity": "blocker|major|minor|info"
107 }}
108 ]
109}}
110```"#,
111 criteria = criteria_text,
112 work = work_output,
113 );
114
115 let mailbox = Mailbox::default();
116 let rt = infra.make_runtime();
117 let output = runner
118 .run(&self.reviewer, &review_task, &rt, &mailbox)
119 .await
120 .map_err(|e| MultiError::AgentFailed(
121 self.reviewer.name.clone(),
122 format!("adversarial review failed: {}", e),
123 ))?;
124
125 let findings = Self::parse_findings(&output.answer);
127 let blocker_count = findings.iter().filter(|f| f.severity == "blocker").count();
128 let passed = if self.fail_on_blockers {
129 blocker_count == 0 && findings.iter().all(|f| f.passed || f.severity != "major")
130 } else {
131 findings.iter().filter(|f| f.passed).count() > findings.len() / 2
132 };
133
134 Ok(AdversarialReviewResult {
135 spec: criteria_text,
136 passed,
137 findings,
138 reviewer_output: output,
139 blocker_count,
140 })
141 }
142
143 fn parse_findings(response: &str) -> Vec<ReviewFinding> {
144 if let Some(json_str) = car_ir::json_extract::extract_json_object(response) {
146 if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&json_str) {
147 if let Some(findings) = parsed.get("findings").and_then(|f| f.as_array()) {
148 return findings.iter().filter_map(|f| {
149 Some(ReviewFinding {
150 criterion: f.get("criterion")?.as_str()?.to_string(),
151 passed: f.get("passed")?.as_bool()?,
152 evidence: f.get("evidence")?.as_str()?.to_string(),
153 severity: f.get("severity").and_then(|s| s.as_str())
154 .unwrap_or("major").to_string(),
155 })
156 }).collect();
157 }
158 }
159 }
160 vec![ReviewFinding {
162 criterion: "overall".to_string(),
163 passed: response.to_lowercase().contains("pass"),
164 evidence: response.to_string(),
165 severity: "major".to_string(),
166 }]
167 }
168}
169
170#[cfg(test)]
171mod tests {
172 use super::*;
173
174 #[test]
175 fn parse_findings_from_json() {
176 let response = r#"```json
177{
178 "passed": false,
179 "findings": [
180 {"criterion": "all tests pass", "passed": true, "evidence": "cargo test: 50 passed", "severity": "info"},
181 {"criterion": "no hardcoded secrets", "passed": false, "evidence": "src/config.rs:42 contains API key", "severity": "blocker"}
182 ]
183}
184```"#;
185 let findings = AdversarialReview::parse_findings(response);
186 assert_eq!(findings.len(), 2);
187 assert!(findings[0].passed);
188 assert!(!findings[1].passed);
189 assert_eq!(findings[1].severity, "blocker");
190 }
191
192 #[test]
193 fn parse_findings_fallback() {
194 let response = "This looks good overall. PASS.";
195 let findings = AdversarialReview::parse_findings(response);
196 assert_eq!(findings.len(), 1);
197 assert!(findings[0].passed);
198 }
199}