car_multi/patterns/
adversarial_review.rs1use crate::error::MultiError;
12use crate::mailbox::Mailbox;
13use crate::runner::AgentRunner;
14use crate::shared::SharedInfra;
15use crate::types::{AgentOutput, AgentSpec};
16use serde::{Deserialize, Serialize};
17use std::sync::Arc;
18use tracing::instrument;
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct ReviewFinding {
23 pub criterion: String,
24 pub passed: bool,
25 pub evidence: String,
26 pub severity: String, }
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct AdversarialReviewResult {
32 pub spec: String,
34 pub passed: bool,
36 pub findings: Vec<ReviewFinding>,
38 pub reviewer_output: AgentOutput,
40 pub blocker_count: usize,
42}
43
44pub struct AdversarialReview {
46 pub reviewer: AgentSpec,
48 pub criteria: Vec<String>,
50 pub fail_on_blockers: bool,
52}
53
54impl AdversarialReview {
55 pub fn new(reviewer: AgentSpec, criteria: Vec<String>) -> Self {
56 Self {
57 reviewer,
58 criteria,
59 fail_on_blockers: true,
60 }
61 }
62
63 #[instrument(name = "multi.adversarial_review", skip_all)]
68 pub async fn run(
69 &self,
70 work_output: &str,
71 runner: &Arc<dyn AgentRunner>,
72 infra: &SharedInfra,
73 ) -> Result<AdversarialReviewResult, MultiError> {
74 let criteria_text = self
75 .criteria
76 .iter()
77 .enumerate()
78 .map(|(i, c)| format!("{}. {}", i + 1, c))
79 .collect::<Vec<_>>()
80 .join("\n");
81
82 let review_task = format!(
83 r#"You are an adversarial reviewer. Your job is to find problems.
84
85## Acceptance Criteria
86{criteria}
87
88## Work Output to Review
89{work}
90
91## Instructions
92Evaluate the work output against EACH acceptance criterion above.
93For each criterion, determine if it PASSES or FAILS. Provide specific evidence
94(file paths, line numbers, code snippets, or direct quotes from the output).
95
96Be strict. If a criterion is ambiguous, assume it should be fully met.
97Flag anything suspicious as a "blocker" or "major" finding.
98
99Respond with a JSON object:
100```json
101{{
102 "passed": true/false,
103 "findings": [
104 {{
105 "criterion": "criterion text",
106 "passed": true/false,
107 "evidence": "specific evidence with file:line references",
108 "severity": "blocker|major|minor|info"
109 }}
110 ]
111}}
112```"#,
113 criteria = criteria_text,
114 work = work_output,
115 );
116
117 infra
121 .begin_agent()
122 .map_err(|e| MultiError::BudgetExhausted(e.to_string()))?;
123
124 let mailbox = Mailbox::default();
125 let rt = infra.make_runtime();
126 let output = runner
127 .run(&self.reviewer, &review_task, &rt, &mailbox)
128 .await
129 .map_err(|e| {
130 MultiError::AgentFailed(
131 self.reviewer.name.clone(),
132 format!("adversarial review failed: {}", e),
133 )
134 })?;
135 infra.record_output(&output);
136
137 let findings = Self::parse_findings(&output.answer);
139 let blocker_count = findings.iter().filter(|f| f.severity == "blocker").count();
140 let passed = if self.fail_on_blockers {
141 blocker_count == 0 && findings.iter().all(|f| f.passed || f.severity != "major")
142 } else {
143 findings.iter().filter(|f| f.passed).count() > findings.len() / 2
144 };
145
146 Ok(AdversarialReviewResult {
147 spec: criteria_text,
148 passed,
149 findings,
150 reviewer_output: output,
151 blocker_count,
152 })
153 }
154
155 fn parse_findings(response: &str) -> Vec<ReviewFinding> {
156 if let Some(json_str) = car_ir::json_extract::extract_json_object(response) {
158 if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&json_str) {
159 if let Some(findings) = parsed.get("findings").and_then(|f| f.as_array()) {
160 return findings
161 .iter()
162 .filter_map(|f| {
163 Some(ReviewFinding {
164 criterion: f.get("criterion")?.as_str()?.to_string(),
165 passed: f.get("passed")?.as_bool()?,
166 evidence: f.get("evidence")?.as_str()?.to_string(),
167 severity: f
168 .get("severity")
169 .and_then(|s| s.as_str())
170 .unwrap_or("major")
171 .to_string(),
172 })
173 })
174 .collect();
175 }
176 }
177 }
178 vec![ReviewFinding {
180 criterion: "overall".to_string(),
181 passed: response.to_lowercase().contains("pass"),
182 evidence: response.to_string(),
183 severity: "major".to_string(),
184 }]
185 }
186}
187
188#[cfg(test)]
189mod tests {
190 use super::*;
191
192 #[test]
193 fn parse_findings_from_json() {
194 let response = r#"```json
195{
196 "passed": false,
197 "findings": [
198 {"criterion": "all tests pass", "passed": true, "evidence": "cargo test: 50 passed", "severity": "info"},
199 {"criterion": "no hardcoded secrets", "passed": false, "evidence": "src/config.rs:42 contains API key", "severity": "blocker"}
200 ]
201}
202```"#;
203 let findings = AdversarialReview::parse_findings(response);
204 assert_eq!(findings.len(), 2);
205 assert!(findings[0].passed);
206 assert!(!findings[1].passed);
207 assert_eq!(findings[1].severity, "blocker");
208 }
209
210 #[test]
211 fn parse_findings_fallback() {
212 let response = "This looks good overall. PASS.";
213 let findings = AdversarialReview::parse_findings(response);
214 assert_eq!(findings.len(), 1);
215 assert!(findings[0].passed);
216 }
217}