car-multi 0.27.0

Multi-agent coordination patterns for Common Agent Runtime
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
//! AdversarialReview — fresh agent reviews work against a spec.
//!
//! Key property: the reviewer gets NO prior context from the author.
//! It receives only the work output and the acceptance criteria, then
//! evaluates pass/fail with evidence (file:line references).
//!
//! Inspired by metaswarm's 4-phase execution loop where adversarial
//! reviewers are ALWAYS fresh Task() instances — never teammates,
//! never resumed, never given prior context.

use crate::error::MultiError;
use crate::mailbox::Mailbox;
use crate::runner::AgentRunner;
use crate::shared::SharedInfra;
use crate::types::{AgentOutput, AgentSpec};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tracing::instrument;

/// Severity used for the synthetic finding emitted when the reviewer's
/// response can't be parsed into a verdict. Distinct from a real
/// pass/fail so consumers can treat it as "not adversarially verified"
/// rather than approve or reject. (car#359)
pub const SEVERITY_INCONCLUSIVE: &str = "inconclusive";

/// Cap on how much of an unparseable reviewer response is carried as
/// evidence. The raw response was previously dumped verbatim, producing
/// multi-KB findings (car#359).
const INCONCLUSIVE_EVIDENCE_CAP: usize = 1024;

/// A single review criterion with pass/fail and evidence.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReviewFinding {
    pub criterion: String,
    pub passed: bool,
    pub evidence: String,
    pub severity: String, // "blocker", "major", "minor", "info", "inconclusive"
}

/// Result of an adversarial review.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AdversarialReviewResult {
    /// The task/spec being reviewed against.
    pub spec: String,
    /// Overall pass/fail.
    pub passed: bool,
    /// Per-criterion findings.
    pub findings: Vec<ReviewFinding>,
    /// The reviewer's raw output.
    pub reviewer_output: AgentOutput,
    /// Number of blockers found.
    pub blocker_count: usize,
    /// True when the reviewer's response could not be parsed into a
    /// verdict. In this state `passed` is forced to `false` (fail-closed)
    /// and `findings` holds a single [`SEVERITY_INCONCLUSIVE`] finding.
    /// Consumers should treat an inconclusive result as "not
    /// adversarially verified" — surface it, don't silently approve or
    /// reject on it. (car#359)
    pub inconclusive: bool,
}

/// Configuration for adversarial review.
pub struct AdversarialReview {
    /// The reviewer agent spec. Must be a different agent from the author.
    pub reviewer: AgentSpec,
    /// The acceptance criteria / spec to review against.
    pub criteria: Vec<String>,
    /// Whether blockers auto-fail the review.
    pub fail_on_blockers: bool,
}

impl AdversarialReview {
    pub fn new(reviewer: AgentSpec, criteria: Vec<String>) -> Self {
        Self {
            reviewer,
            criteria,
            fail_on_blockers: true,
        }
    }

    /// Run an adversarial review of the given work output.
    ///
    /// The reviewer is always a fresh agent with NO context from the author.
    /// It only sees: the work output and the acceptance criteria.
    #[instrument(name = "multi.adversarial_review", skip_all)]
    pub async fn run(
        &self,
        work_output: &str,
        runner: &Arc<dyn AgentRunner>,
        infra: &SharedInfra,
    ) -> Result<AdversarialReviewResult, MultiError> {
        let criteria_text = self
            .criteria
            .iter()
            .enumerate()
            .map(|(i, c)| format!("{}. {}", i + 1, c))
            .collect::<Vec<_>>()
            .join("\n");

        let review_task = format!(
            r#"You are an adversarial reviewer. Your job is to find problems.

## Acceptance Criteria
{criteria}

## Work Output to Review
{work}

## Instructions
Evaluate the work output against EACH acceptance criterion above.
For each criterion, determine if it PASSES or FAILS. Provide specific evidence
(file paths, line numbers, code snippets, or direct quotes from the output).

Be strict. If a criterion is ambiguous, assume it should be fully met.
Flag anything suspicious as a "blocker" or "major" finding.

Respond with a JSON object:
```json
{{
  "passed": true/false,
  "findings": [
    {{
      "criterion": "criterion text",
      "passed": true/false,
      "evidence": "specific evidence with file:line references",
      "severity": "blocker|major|minor|info"
    }}
  ]
}}
```"#,
            criteria = criteria_text,
            work = work_output,
        );

        // Budget gate the reviewer. The review is a single agent and its whole
        // purpose; if the budget can't afford it, surface that rather than
        // returning a misleading empty pass/fail.
        infra
            .begin_agent()
            .map_err(|e| MultiError::BudgetExhausted(e.to_string()))?;

        let mailbox = Mailbox::default();
        let rt = infra.make_runtime();
        let output = runner
            .run(&self.reviewer, &review_task, &rt, &mailbox)
            .await
            .map_err(|e| {
                MultiError::AgentFailed(
                    self.reviewer.name.clone(),
                    format!("adversarial review failed: {}", e),
                )
            })?;
        infra.record_output(&output);

        // Parse the review response
        let parsed = Self::parse_findings(&output.answer);
        let findings = parsed.findings;
        let blocker_count = findings.iter().filter(|f| f.severity == "blocker").count();
        let passed = if parsed.inconclusive {
            // The review couldn't be parsed into a verdict. Don't guess —
            // an unverified review is not a pass. (car#359)
            false
        } else if self.fail_on_blockers {
            blocker_count == 0 && findings.iter().all(|f| f.passed || f.severity != "major")
        } else {
            findings.iter().filter(|f| f.passed).count() > findings.len() / 2
        };

        Ok(AdversarialReviewResult {
            spec: criteria_text,
            passed,
            findings,
            reviewer_output: output,
            blocker_count,
            inconclusive: parsed.inconclusive,
        })
    }

    /// Parse a reviewer response into findings.
    ///
    /// Returns `inconclusive: true` when no structured verdict could be
    /// extracted — the previous behavior of guessing pass/fail from the
    /// substring `"pass"` fails open (review prompts and model chatter
    /// routinely contain "passes"), so an unparseable review is reported
    /// as inconclusive instead, for the caller to treat as unverified.
    /// (car#359)
    fn parse_findings(response: &str) -> ParsedReview {
        // Try to extract JSON
        if let Some(json_str) = car_ir::json_extract::extract_json_object(response) {
            if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&json_str) {
                let findings: Vec<ReviewFinding> = parsed
                    .get("findings")
                    .and_then(|f| f.as_array())
                    .map(|arr| {
                        arr.iter()
                            .filter_map(|f| {
                                Some(ReviewFinding {
                                    criterion: f.get("criterion")?.as_str()?.to_string(),
                                    passed: f.get("passed")?.as_bool()?,
                                    evidence: f.get("evidence")?.as_str()?.to_string(),
                                    severity: f
                                        .get("severity")
                                        .and_then(|s| s.as_str())
                                        .unwrap_or("major")
                                        .to_string(),
                                })
                            })
                            .collect()
                    })
                    .unwrap_or_default();
                if !findings.is_empty() {
                    return ParsedReview {
                        findings,
                        inconclusive: false,
                    };
                }
                // No usable per-criterion findings, but an explicit
                // top-level `passed` boolean is still a conclusive verdict —
                // honor it (a reviewer that finds nothing wrong legitimately
                // returns `{"passed": true, "findings": []}`) rather than
                // discarding the one piece of structure the prompt asked for.
                if let Some(passed) = parsed.get("passed").and_then(|p| p.as_bool()) {
                    return ParsedReview {
                        findings: vec![ReviewFinding {
                            criterion: "overall".to_string(),
                            passed,
                            evidence: if passed {
                                "reviewer reported a clean pass with no per-criterion findings"
                            } else {
                                "reviewer reported failure with no per-criterion findings"
                            }
                            .to_string(),
                            severity: if passed { "info" } else { "major" }.to_string(),
                        }],
                        inconclusive: false,
                    };
                }
                // Parseable JSON but neither well-formed findings nor a
                // top-level verdict — as unverified as no JSON at all.
            }
        }
        // Fallback: the response carries no extractable verdict. Emit a
        // single inconclusive finding (fail-closed) with a bounded slice
        // of the raw response for diagnosis, never the whole thing.
        let mut evidence = String::from(
            "reviewer response could not be parsed into a verdict; treat as not verified. raw: ",
        );
        let raw = response.trim();
        if raw.len() > INCONCLUSIVE_EVIDENCE_CAP {
            // Slice on a char boundary to avoid panicking mid-codepoint.
            let mut end = INCONCLUSIVE_EVIDENCE_CAP;
            while end > 0 && !raw.is_char_boundary(end) {
                end -= 1;
            }
            evidence.push_str(&raw[..end]);
            evidence.push_str("… [truncated]");
        } else {
            evidence.push_str(raw);
        }
        ParsedReview {
            findings: vec![ReviewFinding {
                criterion: "overall".to_string(),
                passed: false,
                evidence,
                severity: SEVERITY_INCONCLUSIVE.to_string(),
            }],
            inconclusive: true,
        }
    }
}

/// Outcome of [`AdversarialReview::parse_findings`]: the extracted
/// findings plus whether the response was unparseable (car#359).
struct ParsedReview {
    findings: Vec<ReviewFinding>,
    inconclusive: bool,
}

#[cfg(test)]
mod tests {
    use super::*;
    use car_engine::Runtime;

    #[test]
    fn parse_findings_from_json() {
        let response = r#"```json
{
  "passed": false,
  "findings": [
    {"criterion": "all tests pass", "passed": true, "evidence": "cargo test: 50 passed", "severity": "info"},
    {"criterion": "no hardcoded secrets", "passed": false, "evidence": "src/config.rs:42 contains API key", "severity": "blocker"}
  ]
}
```"#;
        let parsed = AdversarialReview::parse_findings(response);
        assert!(!parsed.inconclusive);
        assert_eq!(parsed.findings.len(), 2);
        assert!(parsed.findings[0].passed);
        assert!(!parsed.findings[1].passed);
        assert_eq!(parsed.findings[1].severity, "blocker");
    }

    #[test]
    fn unparseable_response_is_inconclusive_not_a_pass() {
        // The old behavior guessed pass from the substring "pass"; a
        // garbage review echoing "passes criterion" must NOT read as a pass.
        let response = "Sure! This work clearly passes criterion 1 and looks great.";
        let parsed = AdversarialReview::parse_findings(response);
        assert!(parsed.inconclusive, "no JSON verdict => inconclusive");
        assert_eq!(parsed.findings.len(), 1);
        assert!(!parsed.findings[0].passed, "must fail closed, not open");
        assert_eq!(parsed.findings[0].severity, SEVERITY_INCONCLUSIVE);
    }

    #[test]
    fn inconclusive_evidence_is_bounded() {
        let response = "x".repeat(INCONCLUSIVE_EVIDENCE_CAP * 4);
        let parsed = AdversarialReview::parse_findings(&response);
        assert!(parsed.inconclusive);
        // Prefix + capped slice + suffix — never the full multi-KB dump.
        assert!(parsed.findings[0].evidence.len() < INCONCLUSIVE_EVIDENCE_CAP + 128);
        assert!(parsed.findings[0].evidence.ends_with("… [truncated]"));
    }

    #[test]
    fn inconclusive_evidence_truncation_is_utf8_safe() {
        // 3-byte chars: the cap (1024) is not a multiple of 3, so byte 1024
        // lands mid-codepoint — exercises the char-boundary walk-back, which
        // must not panic (slicing mid-codepoint would).
        let response = "".repeat(INCONCLUSIVE_EVIDENCE_CAP);
        let parsed = AdversarialReview::parse_findings(&response);
        assert!(parsed.inconclusive);
        // Bounded (prefix + ≤cap slice + suffix) and truncated; valid UTF-8
        // by construction since `evidence` is a `String`.
        assert!(parsed.findings[0].evidence.len() < INCONCLUSIVE_EVIDENCE_CAP + 128);
        assert!(parsed.findings[0].evidence.ends_with("… [truncated]"));
    }

    #[test]
    fn empty_findings_with_explicit_pass_is_a_conclusive_pass() {
        // A reviewer that finds nothing wrong legitimately returns this —
        // it must NOT be marked inconclusive (car#359 review follow-up).
        let response = r#"{"passed": true, "findings": []}"#;
        let parsed = AdversarialReview::parse_findings(response);
        assert!(!parsed.inconclusive, "explicit top-level verdict is conclusive");
        assert_eq!(parsed.findings.len(), 1);
        assert!(parsed.findings[0].passed);
        assert_eq!(parsed.findings[0].severity, "info");
    }

    #[test]
    fn empty_findings_with_explicit_fail_is_a_conclusive_fail() {
        let response = r#"{"passed": false, "findings": []}"#;
        let parsed = AdversarialReview::parse_findings(response);
        assert!(!parsed.inconclusive);
        assert!(!parsed.findings[0].passed);
    }

    #[test]
    fn empty_findings_without_verdict_is_inconclusive() {
        // No usable findings AND no top-level boolean — genuinely unverified.
        let response = r#"{"summary": "looks fine", "findings": []}"#;
        let parsed = AdversarialReview::parse_findings(response);
        assert!(parsed.inconclusive);
        assert_eq!(parsed.findings[0].severity, SEVERITY_INCONCLUSIVE);
    }

    #[test]
    fn no_findings_key_and_no_verdict_is_inconclusive() {
        let response = r#"{"note": "I reviewed it"}"#;
        let parsed = AdversarialReview::parse_findings(response);
        assert!(parsed.inconclusive);
    }

    /// Reviewer that always returns the same fixed answer, to drive
    /// `run()` end-to-end and pin the gate-level fail-closed behavior.
    struct FixedReviewer(&'static str);

    #[async_trait::async_trait]
    impl AgentRunner for FixedReviewer {
        async fn run(
            &self,
            spec: &AgentSpec,
            _task: &str,
            _runtime: &Runtime,
            _mailbox: &Mailbox,
        ) -> Result<AgentOutput, MultiError> {
            Ok(AgentOutput {
                name: spec.name.clone(),
                answer: self.0.to_string(),
                turns: 1,
                tool_calls: 0,
                duration_ms: 1.0,
                error: None,
                outcome: None,
                tokens: None,
                tools_used: Vec::new(),
            })
        }
    }

    #[tokio::test]
    async fn run_forces_fail_closed_on_inconclusive() {
        // Garbage that echoes "passes" — the old substring heuristic would
        // have flipped this to passed=true.
        let runner: Arc<dyn AgentRunner> =
            Arc::new(FixedReviewer("Looks great, this passes everything!"));
        let infra = SharedInfra::new();
        let r = AdversarialReview::new(
            AgentSpec::new("reviewer", "review it"),
            vec!["criterion one".to_string()],
        )
        .run("some work output", &runner, &infra)
        .await
        .unwrap();
        assert!(r.inconclusive, "unparseable review must be inconclusive");
        assert!(!r.passed, "inconclusive must fail closed at the gate");
    }
}